clang  19.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit Builtin calls as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ABIInfo.h"
14 #include "CGCUDARuntime.h"
15 #include "CGCXXABI.h"
16 #include "CGHLSLRuntime.h"
17 #include "CGObjCRuntime.h"
18 #include "CGOpenCLRuntime.h"
19 #include "CGRecordLayout.h"
20 #include "CodeGenFunction.h"
21 #include "CodeGenModule.h"
22 #include "ConstantEmitter.h"
23 #include "PatternInit.h"
24 #include "TargetInfo.h"
25 #include "clang/AST/ASTContext.h"
26 #include "clang/AST/Attr.h"
27 #include "clang/AST/Decl.h"
28 #include "clang/AST/OSLog.h"
31 #include "clang/Basic/TargetInfo.h"
35 #include "llvm/ADT/APFloat.h"
36 #include "llvm/ADT/APInt.h"
37 #include "llvm/ADT/FloatingPointMode.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/StringExtras.h"
40 #include "llvm/Analysis/ValueTracking.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/FPAccuracy.h"
43 #include "llvm/IR/InlineAsm.h"
44 #include "llvm/IR/Intrinsics.h"
45 #include "llvm/IR/IntrinsicsAArch64.h"
46 #include "llvm/IR/IntrinsicsAMDGPU.h"
47 #include "llvm/IR/IntrinsicsARM.h"
48 #include "llvm/IR/IntrinsicsBPF.h"
49 #include "llvm/IR/IntrinsicsDirectX.h"
50 #include "llvm/IR/IntrinsicsHexagon.h"
51 #include "llvm/IR/IntrinsicsNVPTX.h"
52 #include "llvm/IR/IntrinsicsPowerPC.h"
53 #include "llvm/IR/IntrinsicsR600.h"
54 #include "llvm/IR/IntrinsicsRISCV.h"
55 #include "llvm/IR/IntrinsicsS390.h"
56 #include "llvm/IR/IntrinsicsVE.h"
57 #include "llvm/IR/IntrinsicsWebAssembly.h"
58 #include "llvm/IR/IntrinsicsX86.h"
59 #include "llvm/IR/MDBuilder.h"
60 #include "llvm/IR/MatrixBuilder.h"
61 #include "llvm/Support/ConvertUTF.h"
62 #include "llvm/Support/MathExtras.h"
63 #include "llvm/Support/ScopedPrinter.h"
64 #include "llvm/TargetParser/AArch64TargetParser.h"
65 #include "llvm/TargetParser/X86TargetParser.h"
66 #include <optional>
67 #include <sstream>
68 
69 using namespace clang;
70 using namespace CodeGen;
71 using namespace llvm;
72 
73 static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
74  Align AlignmentInBytes) {
75  ConstantInt *Byte;
76  switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
78  // Nothing to initialize.
79  return;
81  Byte = CGF.Builder.getInt8(0x00);
82  break;
84  llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
85  Byte = llvm::dyn_cast<llvm::ConstantInt>(
86  initializationPatternFor(CGF.CGM, Int8));
87  break;
88  }
89  }
90  if (CGF.CGM.stopAutoInit())
91  return;
92  auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
93  I->addAnnotationMetadata("auto-init");
94 }
95 
96 /// getBuiltinLibFunction - Given a builtin id for a function like
97 /// "__builtin_fabsf", return a Function* for "fabsf".
99  unsigned BuiltinID) {
100  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
101 
102  // Get the name, skip over the __builtin_ prefix (if necessary).
103  StringRef Name;
104  GlobalDecl D(FD);
105 
106  // TODO: This list should be expanded or refactored after all GCC-compatible
107  // std libcall builtins are implemented.
108  static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
109  {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
110  {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
111  {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
112  {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
113  {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
114  {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
115  {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
116  {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
117  {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
118  {Builtin::BI__builtin_printf, "__printfieee128"},
119  {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
120  {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
121  {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
122  {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
123  {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
124  {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
125  {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
126  {Builtin::BI__builtin_scanf, "__scanfieee128"},
127  {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
128  {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
129  {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
130  {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
131  {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
132  };
133 
134  // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
135  // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
136  // if it is 64-bit 'long double' mode.
137  static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
138  {Builtin::BI__builtin_frexpl, "frexp"},
139  {Builtin::BI__builtin_ldexpl, "ldexp"},
140  {Builtin::BI__builtin_modfl, "modf"},
141  };
142 
143  // If the builtin has been declared explicitly with an assembler label,
144  // use the mangled name. This differs from the plain label on platforms
145  // that prefix labels.
146  if (FD->hasAttr<AsmLabelAttr>())
147  Name = getMangledName(D);
148  else {
149  // TODO: This mutation should also be applied to other targets other than
150  // PPC, after backend supports IEEE 128-bit style libcalls.
151  if (getTriple().isPPC64() &&
152  &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
153  F128Builtins.contains(BuiltinID))
154  Name = F128Builtins[BuiltinID];
155  else if (getTriple().isOSAIX() &&
156  &getTarget().getLongDoubleFormat() ==
157  &llvm::APFloat::IEEEdouble() &&
158  AIXLongDouble64Builtins.contains(BuiltinID))
159  Name = AIXLongDouble64Builtins[BuiltinID];
160  else
161  Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
162  }
163 
164  llvm::FunctionType *Ty =
165  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
166 
167  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
168 }
169 
170 /// Emit the conversions required to turn the given value into an
171 /// integer of the given size.
172 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
173  QualType T, llvm::IntegerType *IntType) {
174  V = CGF.EmitToMemory(V, T);
175 
176  if (V->getType()->isPointerTy())
177  return CGF.Builder.CreatePtrToInt(V, IntType);
178 
179  assert(V->getType() == IntType);
180  return V;
181 }
182 
183 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
184  QualType T, llvm::Type *ResultType) {
185  V = CGF.EmitFromMemory(V, T);
186 
187  if (ResultType->isPointerTy())
188  return CGF.Builder.CreateIntToPtr(V, ResultType);
189 
190  assert(V->getType() == ResultType);
191  return V;
192 }
193 
195  ASTContext &Ctx = CGF.getContext();
196  Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
197  unsigned Bytes = Ptr.getElementType()->isPointerTy()
199  : Ptr.getElementType()->getScalarSizeInBits() / 8;
200  unsigned Align = Ptr.getAlignment().getQuantity();
201  if (Align % Bytes != 0) {
202  DiagnosticsEngine &Diags = CGF.CGM.getDiags();
203  Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
204  // Force address to be at least naturally-aligned.
205  return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
206  }
207  return Ptr;
208 }
209 
210 /// Utility to insert an atomic instruction based on Intrinsic::ID
211 /// and the expression node.
213  CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
214  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
215 
216  QualType T = E->getType();
217  assert(E->getArg(0)->getType()->isPointerType());
218  assert(CGF.getContext().hasSameUnqualifiedType(T,
219  E->getArg(0)->getType()->getPointeeType()));
220  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
221 
222  Address DestAddr = CheckAtomicAlignment(CGF, E);
223 
224  llvm::IntegerType *IntType = llvm::IntegerType::get(
225  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
226 
227  llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
228  llvm::Type *ValueType = Val->getType();
229  Val = EmitToInt(CGF, Val, T, IntType);
230 
231  llvm::Value *Result =
232  CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
233  return EmitFromInt(CGF, Result, T, ValueType);
234 }
235 
237  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
238  Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
239 
240  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
241  LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
242  LV.setNontemporal(true);
243  CGF.EmitStoreOfScalar(Val, LV, false);
244  return nullptr;
245 }
246 
248  Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
249 
250  LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
251  LV.setNontemporal(true);
252  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
253 }
254 
256  llvm::AtomicRMWInst::BinOp Kind,
257  const CallExpr *E) {
258  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
259 }
260 
261 /// Utility to insert an atomic instruction based Intrinsic::ID and
262 /// the expression node, where the return value is the result of the
263 /// operation.
265  llvm::AtomicRMWInst::BinOp Kind,
266  const CallExpr *E,
267  Instruction::BinaryOps Op,
268  bool Invert = false) {
269  QualType T = E->getType();
270  assert(E->getArg(0)->getType()->isPointerType());
271  assert(CGF.getContext().hasSameUnqualifiedType(T,
272  E->getArg(0)->getType()->getPointeeType()));
273  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
274 
275  Address DestAddr = CheckAtomicAlignment(CGF, E);
276 
277  llvm::IntegerType *IntType = llvm::IntegerType::get(
278  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
279 
280  llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
281  llvm::Type *ValueType = Val->getType();
282  Val = EmitToInt(CGF, Val, T, IntType);
283 
284  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
285  Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
286  Result = CGF.Builder.CreateBinOp(Op, Result, Val);
287  if (Invert)
288  Result =
289  CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
290  llvm::ConstantInt::getAllOnesValue(IntType));
291  Result = EmitFromInt(CGF, Result, T, ValueType);
292  return RValue::get(Result);
293 }
294 
295 /// Utility to insert an atomic cmpxchg instruction.
296 ///
297 /// @param CGF The current codegen function.
298 /// @param E Builtin call expression to convert to cmpxchg.
299 /// arg0 - address to operate on
300 /// arg1 - value to compare with
301 /// arg2 - new value
302 /// @param ReturnBool Specifies whether to return success flag of
303 /// cmpxchg result or the old value.
304 ///
305 /// @returns result of cmpxchg, according to ReturnBool
306 ///
307 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
308 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
310  bool ReturnBool) {
311  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
312  Address DestAddr = CheckAtomicAlignment(CGF, E);
313 
314  llvm::IntegerType *IntType = llvm::IntegerType::get(
315  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
316 
317  Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
318  llvm::Type *ValueType = Cmp->getType();
319  Cmp = EmitToInt(CGF, Cmp, T, IntType);
320  Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
321 
322  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
323  DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
324  llvm::AtomicOrdering::SequentiallyConsistent);
325  if (ReturnBool)
326  // Extract boolean success flag and zext it to int.
327  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
328  CGF.ConvertType(E->getType()));
329  else
330  // Extract old value and emit it using the same type as compare value.
331  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
332  ValueType);
333 }
334 
335 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
336 /// _InterlockedCompareExchange* intrinsics which have the following signature:
337 /// T _InterlockedCompareExchange(T volatile *Destination,
338 /// T Exchange,
339 /// T Comparand);
340 ///
341 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
342 /// cmpxchg *Destination, Comparand, Exchange.
343 /// So we need to swap Comparand and Exchange when invoking
344 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
345 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
346 /// already swapped.
347 
348 static
350  AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
351  assert(E->getArg(0)->getType()->isPointerType());
352  assert(CGF.getContext().hasSameUnqualifiedType(
353  E->getType(), E->getArg(0)->getType()->getPointeeType()));
354  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
355  E->getArg(1)->getType()));
356  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
357  E->getArg(2)->getType()));
358 
359  Address DestAddr = CheckAtomicAlignment(CGF, E);
360 
361  auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
362  auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
363 
364  // For Release ordering, the failure ordering should be Monotonic.
365  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
366  AtomicOrdering::Monotonic :
367  SuccessOrdering;
368 
369  // The atomic instruction is marked volatile for consistency with MSVC. This
370  // blocks the few atomics optimizations that LLVM has. If we want to optimize
371  // _Interlocked* operations in the future, we will have to remove the volatile
372  // marker.
373  auto *Result = CGF.Builder.CreateAtomicCmpXchg(
374  DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
375  Result->setVolatile(true);
376  return CGF.Builder.CreateExtractValue(Result, 0);
377 }
378 
379 // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
380 // prototyped like this:
381 //
382 // unsigned char _InterlockedCompareExchange128...(
383 // __int64 volatile * _Destination,
384 // __int64 _ExchangeHigh,
385 // __int64 _ExchangeLow,
386 // __int64 * _ComparandResult);
387 //
388 // Note that Destination is assumed to be at least 16-byte aligned, despite
389 // being typed int64.
390 
392  const CallExpr *E,
393  AtomicOrdering SuccessOrdering) {
394  assert(E->getNumArgs() == 4);
395  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
396  llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
397  llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
398  Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
399 
400  assert(DestPtr->getType()->isPointerTy());
401  assert(!ExchangeHigh->getType()->isPointerTy());
402  assert(!ExchangeLow->getType()->isPointerTy());
403 
404  // For Release ordering, the failure ordering should be Monotonic.
405  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
406  ? AtomicOrdering::Monotonic
407  : SuccessOrdering;
408 
409  // Convert to i128 pointers and values. Alignment is also overridden for
410  // destination pointer.
411  llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
412  Address DestAddr(DestPtr, Int128Ty,
413  CGF.getContext().toCharUnitsFromBits(128));
414  ComparandAddr = ComparandAddr.withElementType(Int128Ty);
415 
416  // (((i128)hi) << 64) | ((i128)lo)
417  ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
418  ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
419  ExchangeHigh =
420  CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
421  llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
422 
423  // Load the comparand for the instruction.
424  llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
425 
426  auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
427  SuccessOrdering, FailureOrdering);
428 
429  // The atomic instruction is marked volatile for consistency with MSVC. This
430  // blocks the few atomics optimizations that LLVM has. If we want to optimize
431  // _Interlocked* operations in the future, we will have to remove the volatile
432  // marker.
433  CXI->setVolatile(true);
434 
435  // Store the result as an outparameter.
436  CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
437  ComparandAddr);
438 
439  // Get the success boolean and zero extend it to i8.
440  Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
441  return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
442 }
443 
445  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
446  assert(E->getArg(0)->getType()->isPointerType());
447 
448  auto *IntTy = CGF.ConvertType(E->getType());
449  Address DestAddr = CheckAtomicAlignment(CGF, E);
450  auto *Result = CGF.Builder.CreateAtomicRMW(
451  AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
452  return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
453 }
454 
456  CodeGenFunction &CGF, const CallExpr *E,
457  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
458  assert(E->getArg(0)->getType()->isPointerType());
459 
460  auto *IntTy = CGF.ConvertType(E->getType());
461  Address DestAddr = CheckAtomicAlignment(CGF, E);
462  auto *Result = CGF.Builder.CreateAtomicRMW(
463  AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
464  return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
465 }
466 
467 // Build a plain volatile load.
469  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
470  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
471  CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
472  llvm::Type *ITy =
473  llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
474  llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
475  Load->setVolatile(true);
476  return Load;
477 }
478 
479 // Build a plain volatile store.
481  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
482  Value *Value = CGF.EmitScalarExpr(E->getArg(1));
483  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
484  CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
485  llvm::StoreInst *Store =
486  CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
487  Store->setVolatile(true);
488  return Store;
489 }
490 
491 static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
492  llvm::Function *FPBuiltinF,
493  ArrayRef<Value *> Args,
494  unsigned ID) {
495  llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args);
496  // TODO: Replace AttrList with a single attribute. The call can only have a
497  // single FPAccuracy attribute.
498  llvm::AttributeList AttrList;
499  // "sycl_used_aspects" metadata associated with the call.
500  llvm::Metadata *AspectMD = nullptr;
501  // sincos() doesn't return a value, but it still has a type associated with
502  // it that corresponds to the operand type.
504  Name, AttrList, AspectMD, ID,
505  Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
506  CI->setAttributes(AttrList);
507 
508  if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
509  CI->setMetadata("sycl_used_aspects",
510  llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
511  return CI;
512 }
513 
514 static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
515  unsigned FPIntrinsicID, unsigned IntrinsicID,
516  bool HasAccuracyRequirement) {
517  return HasAccuracyRequirement
518  ? CGF.CGM.getIntrinsic(FPIntrinsicID, Src0->getType())
519  : CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
520 }
521 
522 static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) {
523  if (!CGF.getLangOpts().FPAccuracyVal.empty())
524  return true;
525  auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str());
526  return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end();
527 }
528 
530  unsigned FPAccuracyIntrinsicID,
531  unsigned IntrinsicID, llvm::Value *Src0,
532  StringRef &Name) {
533  Function *Func = nullptr;
534  if (FPAccuracyIntrinsicID != Intrinsic::not_intrinsic) {
535  if (!CGF.getLangOpts().FPAccuracyVal.empty() ||
536  !CGF.getLangOpts().FPAccuracyFuncMap.empty()) {
537  if (CGF.getLangOpts().MathErrno) {
538  DiagnosticsEngine &Diags = CGF.CGM.getDiags();
539  Diags.Report(E->getBeginLoc(), diag::err_drv_incompatible_options)
540  << "-ffp-accuracy"
541  << "-fmath-errno";
542  } else {
543  Name =
545  // Use fpbuiltin intrinsic only when needed.
546  Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID,
547  hasAccuracyRequirement(CGF, Name));
548  }
549  }
550  }
551  return Func;
552 }
553 
554 // Emit a simple mangled intrinsic that has 1 argument and a return type
555 // matching the argument type. Depending on mode, this may be a constrained
556 // or an fpbuiltin floating-point intrinsic.
558  CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID,
559  unsigned ConstrainedIntrinsicID,
560  unsigned FPAccuracyIntrinsicID = Intrinsic::not_intrinsic) {
561  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
562  StringRef Name;
563  Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
564  IntrinsicID, Src0, Name);
565  if (Func)
566  return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0},
567  FPAccuracyIntrinsicID);
568 
569  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
570  if (CGF.Builder.getIsFPConstrained()) {
571  Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
572  return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
573  } else {
574  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
575  return CGF.Builder.CreateCall(F, Src0);
576  }
577 }
578 
579 // Emit an intrinsic that has 2 operands of the same type as its result.
580 // Depending on mode, this may be a constrained floating-point intrinsic.
582  CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID,
583  unsigned ConstrainedIntrinsicID,
584  unsigned FPAccuracyIntrinsicID = Intrinsic::not_intrinsic) {
585  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
586  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
587  StringRef Name;
588  Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
589  IntrinsicID, Src0, Name);
590  if (Func)
591  return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1},
592  FPAccuracyIntrinsicID);
593  if (CGF.Builder.getIsFPConstrained()) {
594  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
595  Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
596  return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
597  } else {
598  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
599  return CGF.Builder.CreateCall(F, { Src0, Src1 });
600  }
601 }
602 
603 // Has second type mangled argument.
605  CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
606  llvm::Intrinsic::ID ConstrainedIntrinsicID) {
607  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
608  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
609 
610  if (CGF.Builder.getIsFPConstrained()) {
611  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
612  Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
613  {Src0->getType(), Src1->getType()});
614  return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
615  }
616 
617  Function *F =
618  CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
619  return CGF.Builder.CreateCall(F, {Src0, Src1});
620 }
621 
622 // Emit an intrinsic that has 3 operands of the same type as its result.
623 // Depending on mode, this may be a constrained floating-point intrinsic.
625  const CallExpr *E, unsigned IntrinsicID,
626  unsigned ConstrainedIntrinsicID) {
627  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
628  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
629  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
630 
631  if (CGF.Builder.getIsFPConstrained()) {
632  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
633  Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
634  return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
635  } else {
636  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
637  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
638  }
639 }
640 
641 // Emit an intrinsic where all operands are of the same type as the result.
642 // Depending on mode, this may be a constrained floating-point intrinsic.
644  unsigned IntrinsicID,
645  unsigned ConstrainedIntrinsicID,
646  llvm::Type *Ty,
647  ArrayRef<Value *> Args) {
648  Function *F;
649  if (CGF.Builder.getIsFPConstrained())
650  F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
651  else
652  F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
653 
654  if (CGF.Builder.getIsFPConstrained())
655  return CGF.Builder.CreateConstrainedFPCall(F, Args);
656  else
657  return CGF.Builder.CreateCall(F, Args);
658 }
659 
660 // Emit a simple mangled intrinsic that has 1 argument and a return type
661 // matching the argument type.
663  unsigned IntrinsicID,
664  llvm::StringRef Name = "") {
665  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
666 
667  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
668  return CGF.Builder.CreateCall(F, Src0, Name);
669 }
670 
671 // Emit an intrinsic that has 2 operands of the same type as its result.
673  const CallExpr *E,
674  unsigned IntrinsicID) {
675  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
676  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
677 
678  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
679  return CGF.Builder.CreateCall(F, { Src0, Src1 });
680 }
681 
682 // Emit an intrinsic that has 3 operands of the same type as its result.
684  const CallExpr *E,
685  unsigned IntrinsicID) {
686  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
687  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
688  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
689 
690  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
691  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
692 }
693 
694 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
696  const CallExpr *E,
697  unsigned IntrinsicID) {
698  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
699  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
700 
701  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
702  return CGF.Builder.CreateCall(F, {Src0, Src1});
703 }
704 
705 // Emit an intrinsic that has overloaded integer result and fp operand.
706 static Value *
708  unsigned IntrinsicID,
709  unsigned ConstrainedIntrinsicID) {
710  llvm::Type *ResultType = CGF.ConvertType(E->getType());
711  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
712 
713  if (CGF.Builder.getIsFPConstrained()) {
714  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
715  Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
716  {ResultType, Src0->getType()});
717  return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
718  } else {
719  Function *F =
720  CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
721  return CGF.Builder.CreateCall(F, Src0);
722  }
723 }
724 
726  llvm::Intrinsic::ID IntrinsicID) {
727  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
728  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
729 
730  QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
731  llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
732  llvm::Function *F =
733  CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
734  llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
735 
736  llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
737  LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
738  CGF.EmitStoreOfScalar(Exp, LV);
739 
740  return CGF.Builder.CreateExtractValue(Call, 0);
741 }
742 
743 /// EmitFAbs - Emit a call to @llvm.fabs().
745  Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
746  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
747  Call->setDoesNotAccessMemory();
748  return Call;
749 }
750 
751 /// Emit the computation of the sign bit for a floating point value. Returns
752 /// the i1 sign bit value.
754  LLVMContext &C = CGF.CGM.getLLVMContext();
755 
756  llvm::Type *Ty = V->getType();
757  int Width = Ty->getPrimitiveSizeInBits();
758  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
759  V = CGF.Builder.CreateBitCast(V, IntTy);
760  if (Ty->isPPC_FP128Ty()) {
761  // We want the sign bit of the higher-order double. The bitcast we just
762  // did works as if the double-double was stored to memory and then
763  // read as an i128. The "store" will put the higher-order double in the
764  // lower address in both little- and big-Endian modes, but the "load"
765  // will treat those bits as a different part of the i128: the low bits in
766  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
767  // we need to shift the high bits down to the low before truncating.
768  Width >>= 1;
769  if (CGF.getTarget().isBigEndian()) {
770  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
771  V = CGF.Builder.CreateLShr(V, ShiftCst);
772  }
773  // We are truncating value in order to extract the higher-order
774  // double, which we will be using to extract the sign from.
775  IntTy = llvm::IntegerType::get(C, Width);
776  V = CGF.Builder.CreateTrunc(V, IntTy);
777  }
778  Value *Zero = llvm::Constant::getNullValue(IntTy);
779  return CGF.Builder.CreateICmpSLT(V, Zero);
780 }
781 
783  const CallExpr *E, llvm::Constant *calleeValue) {
784  CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
785  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
786 }
787 
788 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
789 /// depending on IntrinsicID.
790 ///
791 /// \arg CGF The current codegen function.
792 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
793 /// \arg X The first argument to the llvm.*.with.overflow.*.
794 /// \arg Y The second argument to the llvm.*.with.overflow.*.
795 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
796 /// \returns The result (i.e. sum/product) returned by the intrinsic.
797 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
798  const llvm::Intrinsic::ID IntrinsicID,
799  llvm::Value *X, llvm::Value *Y,
800  llvm::Value *&Carry) {
801  // Make sure we have integers of the same width.
802  assert(X->getType() == Y->getType() &&
803  "Arguments must be the same type. (Did you forget to make sure both "
804  "arguments have the same integer width?)");
805 
806  Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
807  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
808  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
809  return CGF.Builder.CreateExtractValue(Tmp, 0);
810 }
811 
813  unsigned IntrinsicID,
814  int low, int high) {
815  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
816  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
817  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
818  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
819  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
820  Call->setMetadata(llvm::LLVMContext::MD_noundef,
821  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
822  return Call;
823 }
824 
825 namespace {
826  struct WidthAndSignedness {
827  unsigned Width;
828  bool Signed;
829  };
830 }
831 
832 static WidthAndSignedness
834  const clang::QualType Type) {
835  assert(Type->isIntegerType() && "Given type is not an integer.");
836  unsigned Width = Type->isBooleanType() ? 1
837  : Type->isBitIntType() ? context.getIntWidth(Type)
838  : context.getTypeInfo(Type).Width;
839  bool Signed = Type->isSignedIntegerType();
840  return {Width, Signed};
841 }
842 
843 // Given one or more integer types, this function produces an integer type that
844 // encompasses them: any value in one of the given types could be expressed in
845 // the encompassing type.
846 static struct WidthAndSignedness
847 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
848  assert(Types.size() > 0 && "Empty list of types.");
849 
850  // If any of the given types is signed, we must return a signed type.
851  bool Signed = false;
852  for (const auto &Type : Types) {
853  Signed |= Type.Signed;
854  }
855 
856  // The encompassing type must have a width greater than or equal to the width
857  // of the specified types. Additionally, if the encompassing type is signed,
858  // its width must be strictly greater than the width of any unsigned types
859  // given.
860  unsigned Width = 0;
861  for (const auto &Type : Types) {
862  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
863  if (Width < MinWidth) {
864  Width = MinWidth;
865  }
866  }
867 
868  return {Width, Signed};
869 }
870 
871 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
872  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
873  return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
874  ArgValue);
875 }
876 
877 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
878 /// __builtin_object_size(p, @p To) is correct
879 static bool areBOSTypesCompatible(int From, int To) {
880  // Note: Our __builtin_object_size implementation currently treats Type=0 and
881  // Type=2 identically. Encoding this implementation detail here may make
882  // improving __builtin_object_size difficult in the future, so it's omitted.
883  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
884 }
885 
886 static llvm::Value *
887 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
888  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
889 }
890 
891 llvm::Value *
892 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
893  llvm::IntegerType *ResType,
894  llvm::Value *EmittedE,
895  bool IsDynamic) {
896  uint64_t ObjectSize;
897  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
898  return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
899  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
900 }
901 
903  ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset) {
904  const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
905  getLangOpts().getStrictFlexArraysLevel();
906  uint32_t FieldNo = 0;
907 
908  if (RD->isImplicit())
909  return nullptr;
910 
911  for (const FieldDecl *FD : RD->fields()) {
912  if ((Name.empty() || FD->getNameAsString() == Name) &&
914  Ctx, FD, FD->getType(), StrictFlexArraysLevel,
915  /*IgnoreTemplateOrMacroSubstitution=*/true)) {
916  const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
917  Offset += Layout.getFieldOffset(FieldNo);
918  return FD;
919  }
920 
921  QualType Ty = FD->getType();
922  if (Ty->isRecordType()) {
923  if (const FieldDecl *Field = FindFlexibleArrayMemberField(
924  Ctx, Ty->getAsRecordDecl(), Name, Offset)) {
925  const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
926  Offset += Layout.getFieldOffset(FieldNo);
927  return Field;
928  }
929  }
930 
931  if (!RD->isUnion())
932  ++FieldNo;
933  }
934 
935  return nullptr;
936 }
937 
938 static unsigned CountCountedByAttrs(const RecordDecl *RD) {
939  unsigned Num = 0;
940 
941  for (const FieldDecl *FD : RD->fields()) {
942  if (FD->getType()->isCountAttributedType())
943  return ++Num;
944 
945  QualType Ty = FD->getType();
946  if (Ty->isRecordType())
947  Num += CountCountedByAttrs(Ty->getAsRecordDecl());
948  }
949 
950  return Num;
951 }
952 
953 llvm::Value *
954 CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
955  llvm::IntegerType *ResType) {
956  // The code generated here calculates the size of a struct with a flexible
957  // array member that uses the counted_by attribute. There are two instances
958  // we handle:
959  //
960  // struct s {
961  // unsigned long flags;
962  // int count;
963  // int array[] __attribute__((counted_by(count)));
964  // }
965  //
966  // 1) bdos of the flexible array itself:
967  //
968  // __builtin_dynamic_object_size(p->array, 1) ==
969  // p->count * sizeof(*p->array)
970  //
971  // 2) bdos of a pointer into the flexible array:
972  //
973  // __builtin_dynamic_object_size(&p->array[42], 1) ==
974  // (p->count - 42) * sizeof(*p->array)
975  //
976  // 2) bdos of the whole struct, including the flexible array:
977  //
978  // __builtin_dynamic_object_size(p, 1) ==
979  // max(sizeof(struct s),
980  // offsetof(struct s, array) + p->count * sizeof(*p->array))
981  //
982  ASTContext &Ctx = getContext();
983  const Expr *Base = E->IgnoreParenImpCasts();
984  const Expr *Idx = nullptr;
985 
986  if (const auto *UO = dyn_cast<UnaryOperator>(Base);
987  UO && UO->getOpcode() == UO_AddrOf) {
988  Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
989  if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
990  Base = ASE->getBase()->IgnoreParenImpCasts();
991  Idx = ASE->getIdx()->IgnoreParenImpCasts();
992 
993  if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
994  int64_t Val = IL->getValue().getSExtValue();
995  if (Val < 0)
996  return getDefaultBuiltinObjectSizeResult(Type, ResType);
997 
998  if (Val == 0)
999  // The index is 0, so we don't need to take it into account.
1000  Idx = nullptr;
1001  }
1002  } else {
1003  // Potential pointer to another element in the struct.
1004  Base = SubExpr;
1005  }
1006  }
1007 
1008  // Get the flexible array member Decl.
1009  const RecordDecl *OuterRD = nullptr;
1010  std::string FAMName;
1011  if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1012  // Check if \p Base is referencing the FAM itself.
1013  const ValueDecl *VD = ME->getMemberDecl();
1014  OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
1015  FAMName = VD->getNameAsString();
1016  } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1017  // Check if we're pointing to the whole struct.
1018  QualType Ty = DRE->getDecl()->getType();
1019  if (Ty->isPointerType())
1020  Ty = Ty->getPointeeType();
1021  OuterRD = Ty->getAsRecordDecl();
1022 
1023  // If we have a situation like this:
1024  //
1025  // struct union_of_fams {
1026  // int flags;
1027  // union {
1028  // signed char normal_field;
1029  // struct {
1030  // int count1;
1031  // int arr1[] __counted_by(count1);
1032  // };
1033  // struct {
1034  // signed char count2;
1035  // int arr2[] __counted_by(count2);
1036  // };
1037  // };
1038  // };
1039  //
1040  // We don't know which 'count' to use in this scenario:
1041  //
1042  // size_t get_size(struct union_of_fams *p) {
1043  // return __builtin_dynamic_object_size(p, 1);
1044  // }
1045  //
1046  // Instead of calculating a wrong number, we give up.
1047  if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1048  return nullptr;
1049  }
1050 
1051  if (!OuterRD)
1052  return nullptr;
1053 
1054  uint64_t Offset = 0;
1055  const FieldDecl *FAMDecl =
1056  FindFlexibleArrayMemberField(Ctx, OuterRD, FAMName, Offset);
1058 
1059  if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1060  // No flexible array member found or it doesn't have the "counted_by"
1061  // attribute.
1062  return nullptr;
1063 
1064  const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
1065  if (!CountedByFD)
1066  // Can't find the field referenced by the "counted_by" attribute.
1067  return nullptr;
1068 
1069  // Build a load of the counted_by field.
1070  bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1071  Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);
1072  if (!CountedByInst)
1073  return getDefaultBuiltinObjectSizeResult(Type, ResType);
1074 
1075  CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1076 
1077  // Build a load of the index and subtract it from the count.
1078  Value *IdxInst = nullptr;
1079  if (Idx) {
1080  if (Idx->HasSideEffects(getContext()))
1081  // We can't have side-effects.
1082  return getDefaultBuiltinObjectSizeResult(Type, ResType);
1083 
1084  bool IdxSigned = Idx->getType()->isSignedIntegerType();
1085  IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1086  IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1087 
1088  // We go ahead with the calculation here. If the index turns out to be
1089  // negative, we'll catch it at the end.
1090  CountedByInst =
1091  Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1092  }
1093 
1094  // Calculate how large the flexible array member is in bytes.
1095  const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1096  CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
1097  llvm::Constant *ElemSize =
1098  llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1099  Value *FAMSize =
1100  Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1101  FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
1102  Value *Res = FAMSize;
1103 
1104  if (isa<DeclRefExpr>(Base)) {
1105  // The whole struct is specificed in the __bdos.
1106  const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
1107 
1108  // Get the offset of the FAM.
1109  llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
1110  Value *OffsetAndFAMSize =
1111  Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
1112 
1113  // Get the full size of the struct.
1114  llvm::Constant *SizeofStruct =
1115  ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
1116 
1117  // max(sizeof(struct s),
1118  // offsetof(struct s, array) + p->count * sizeof(*p->array))
1119  Res = IsSigned
1120  ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1121  OffsetAndFAMSize, SizeofStruct)
1122  : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1123  OffsetAndFAMSize, SizeofStruct);
1124  }
1125 
1126  // A negative \p IdxInst or \p CountedByInst means that the index lands
1127  // outside of the flexible array member. If that's the case, we want to
1128  // return 0.
1129  Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1130  if (IdxInst)
1131  Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1132 
1133  return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1134 }
1135 
1136 /// Returns a Value corresponding to the size of the given expression.
1137 /// This Value may be either of the following:
1138 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1139 /// it)
1140 /// - A call to the @llvm.objectsize intrinsic
1141 ///
1142 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1143 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
1144 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1145 llvm::Value *
1146 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1147  llvm::IntegerType *ResType,
1148  llvm::Value *EmittedE, bool IsDynamic) {
1149  // We need to reference an argument if the pointer is a parameter with the
1150  // pass_object_size attribute.
1151  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1152  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1153  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1154  if (Param != nullptr && PS != nullptr &&
1155  areBOSTypesCompatible(PS->getType(), Type)) {
1156  auto Iter = SizeArguments.find(Param);
1157  assert(Iter != SizeArguments.end());
1158 
1159  const ImplicitParamDecl *D = Iter->second;
1160  auto DIter = LocalDeclMap.find(D);
1161  assert(DIter != LocalDeclMap.end());
1162 
1163  return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1164  getContext().getSizeType(), E->getBeginLoc());
1165  }
1166  }
1167 
1168  if (IsDynamic) {
1169  // Emit special code for a flexible array member with the "counted_by"
1170  // attribute.
1171  if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1172  return V;
1173  }
1174 
1175  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1176  // evaluate E for side-effects. In either case, we shouldn't lower to
1177  // @llvm.objectsize.
1178  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1179  return getDefaultBuiltinObjectSizeResult(Type, ResType);
1180 
1181  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1182  assert(Ptr->getType()->isPointerTy() &&
1183  "Non-pointer passed to __builtin_object_size?");
1184 
1185  Function *F =
1186  CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1187 
1188  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1189  Value *Min = Builder.getInt1((Type & 2) != 0);
1190  // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1191  Value *NullIsUnknown = Builder.getTrue();
1192  Value *Dynamic = Builder.getInt1(IsDynamic);
1193  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1194 }
1195 
1196 namespace {
1197 /// A struct to generically describe a bit test intrinsic.
1198 struct BitTest {
1199  enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1200  enum InterlockingKind : uint8_t {
1201  Unlocked,
1202  Sequential,
1203  Acquire,
1204  Release,
1205  NoFence
1206  };
1207 
1208  ActionKind Action;
1209  InterlockingKind Interlocking;
1210  bool Is64Bit;
1211 
1212  static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1213 };
1214 
1215 // Returns the first convergence entry/loop/anchor instruction found in |BB|.
1216 // std::nullptr otherwise.
1217 llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) {
1218  for (auto &I : *BB) {
1219  auto *II = dyn_cast<llvm::IntrinsicInst>(&I);
1220  if (II && isConvergenceControlIntrinsic(II->getIntrinsicID()))
1221  return II;
1222  }
1223  return nullptr;
1224 }
1225 
1226 } // namespace
1227 
1228 llvm::CallBase *
1229 CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input,
1230  llvm::Value *ParentToken) {
1231  llvm::Value *bundleArgs[] = {ParentToken};
1232  llvm::OperandBundleDef OB("convergencectrl", bundleArgs);
1233  auto Output = llvm::CallBase::addOperandBundle(
1234  Input, llvm::LLVMContext::OB_convergencectrl, OB, Input);
1235  Input->replaceAllUsesWith(Output);
1236  Input->eraseFromParent();
1237  return Output;
1238 }
1239 
1240 llvm::IntrinsicInst *
1241 CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB,
1242  llvm::Value *ParentToken) {
1243  CGBuilderTy::InsertPoint IP = Builder.saveIP();
1244  Builder.SetInsertPoint(&BB->front());
1245  auto CB = Builder.CreateIntrinsic(
1246  llvm::Intrinsic::experimental_convergence_loop, {}, {});
1247  Builder.restoreIP(IP);
1248 
1249  auto I = addConvergenceControlToken(CB, ParentToken);
1250  return cast<llvm::IntrinsicInst>(I);
1251 }
1252 
1253 llvm::IntrinsicInst *
1254 CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
1255  auto *BB = &F->getEntryBlock();
1256  auto *token = getConvergenceToken(BB);
1257  if (token)
1258  return token;
1259 
1260  // Adding a convergence token requires the function to be marked as
1261  // convergent.
1262  F->setConvergent();
1263 
1264  CGBuilderTy::InsertPoint IP = Builder.saveIP();
1265  Builder.SetInsertPoint(&BB->front());
1266  auto I = Builder.CreateIntrinsic(
1267  llvm::Intrinsic::experimental_convergence_entry, {}, {});
1268  assert(isa<llvm::IntrinsicInst>(I));
1269  Builder.restoreIP(IP);
1270 
1271  return cast<llvm::IntrinsicInst>(I);
1272 }
1273 
1274 llvm::IntrinsicInst *
1275 CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) {
1276  assert(LI != nullptr);
1277 
1278  auto *token = getConvergenceToken(LI->getHeader());
1279  if (token)
1280  return token;
1281 
1282  llvm::IntrinsicInst *PII =
1283  LI->getParent()
1284  ? emitConvergenceLoopToken(
1285  LI->getHeader(), getOrEmitConvergenceLoopToken(LI->getParent()))
1286  : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent());
1287 
1288  return emitConvergenceLoopToken(LI->getHeader(), PII);
1289 }
1290 
1291 llvm::CallBase *
1293  llvm::Value *ParentToken =
1294  LoopStack.hasInfo()
1295  ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo())
1296  : getOrEmitConvergenceEntryToken(Input->getFunction());
1297  return addConvergenceControlToken(Input, ParentToken);
1298 }
1299 
1300 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1301  switch (BuiltinID) {
1302  // Main portable variants.
1303  case Builtin::BI_bittest:
1304  return {TestOnly, Unlocked, false};
1305  case Builtin::BI_bittestandcomplement:
1306  return {Complement, Unlocked, false};
1307  case Builtin::BI_bittestandreset:
1308  return {Reset, Unlocked, false};
1309  case Builtin::BI_bittestandset:
1310  return {Set, Unlocked, false};
1311  case Builtin::BI_interlockedbittestandreset:
1312  return {Reset, Sequential, false};
1313  case Builtin::BI_interlockedbittestandset:
1314  return {Set, Sequential, false};
1315 
1316  // X86-specific 64-bit variants.
1317  case Builtin::BI_bittest64:
1318  return {TestOnly, Unlocked, true};
1319  case Builtin::BI_bittestandcomplement64:
1320  return {Complement, Unlocked, true};
1321  case Builtin::BI_bittestandreset64:
1322  return {Reset, Unlocked, true};
1323  case Builtin::BI_bittestandset64:
1324  return {Set, Unlocked, true};
1325  case Builtin::BI_interlockedbittestandreset64:
1326  return {Reset, Sequential, true};
1327  case Builtin::BI_interlockedbittestandset64:
1328  return {Set, Sequential, true};
1329 
1330  // ARM/AArch64-specific ordering variants.
1331  case Builtin::BI_interlockedbittestandset_acq:
1332  return {Set, Acquire, false};
1333  case Builtin::BI_interlockedbittestandset_rel:
1334  return {Set, Release, false};
1335  case Builtin::BI_interlockedbittestandset_nf:
1336  return {Set, NoFence, false};
1337  case Builtin::BI_interlockedbittestandreset_acq:
1338  return {Reset, Acquire, false};
1339  case Builtin::BI_interlockedbittestandreset_rel:
1340  return {Reset, Release, false};
1341  case Builtin::BI_interlockedbittestandreset_nf:
1342  return {Reset, NoFence, false};
1343  }
1344  llvm_unreachable("expected only bittest intrinsics");
1345 }
1346 
1347 static char bitActionToX86BTCode(BitTest::ActionKind A) {
1348  switch (A) {
1349  case BitTest::TestOnly: return '\0';
1350  case BitTest::Complement: return 'c';
1351  case BitTest::Reset: return 'r';
1352  case BitTest::Set: return 's';
1353  }
1354  llvm_unreachable("invalid action");
1355 }
1356 
1357 static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
1358  BitTest BT,
1359  const CallExpr *E, Value *BitBase,
1360  Value *BitPos) {
1361  char Action = bitActionToX86BTCode(BT.Action);
1362  char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1363 
1364  // Build the assembly.
1366  raw_svector_ostream AsmOS(Asm);
1367  if (BT.Interlocking != BitTest::Unlocked)
1368  AsmOS << "lock ";
1369  AsmOS << "bt";
1370  if (Action)
1371  AsmOS << Action;
1372  AsmOS << SizeSuffix << " $2, ($1)";
1373 
1374  // Build the constraints. FIXME: We should support immediates when possible.
1375  std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1376  std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1377  if (!MachineClobbers.empty()) {
1378  Constraints += ',';
1379  Constraints += MachineClobbers;
1380  }
1381  llvm::IntegerType *IntType = llvm::IntegerType::get(
1382  CGF.getLLVMContext(),
1383  CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1384  llvm::FunctionType *FTy =
1385  llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1386 
1387  llvm::InlineAsm *IA =
1388  llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1389  return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1390 }
1391 
1392 static llvm::AtomicOrdering
1393 getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1394  switch (I) {
1395  case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1396  case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1397  case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1398  case BitTest::Release: return llvm::AtomicOrdering::Release;
1399  case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1400  }
1401  llvm_unreachable("invalid interlocking");
1402 }
1403 
1404 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1405 /// bits and a bit position and read and optionally modify the bit at that
1406 /// position. The position index can be arbitrarily large, i.e. it can be larger
1407 /// than 31 or 63, so we need an indexed load in the general case.
1408 static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1409  unsigned BuiltinID,
1410  const CallExpr *E) {
1411  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1412  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1413 
1414  BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1415 
1416  // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1417  // indexing operation internally. Use them if possible.
1418  if (CGF.getTarget().getTriple().isX86())
1419  return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1420 
1421  // Otherwise, use generic code to load one byte and test the bit. Use all but
1422  // the bottom three bits as the array index, and the bottom three bits to form
1423  // a mask.
1424  // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1425  Value *ByteIndex = CGF.Builder.CreateAShr(
1426  BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1427  Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1428  Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1429  ByteIndex, "bittest.byteaddr"),
1430  CGF.Int8Ty, CharUnits::One());
1431  Value *PosLow =
1432  CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1433  llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1434 
1435  // The updating instructions will need a mask.
1436  Value *Mask = nullptr;
1437  if (BT.Action != BitTest::TestOnly) {
1438  Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1439  "bittest.mask");
1440  }
1441 
1442  // Check the action and ordering of the interlocked intrinsics.
1443  llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1444 
1445  Value *OldByte = nullptr;
1446  if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1447  // Emit a combined atomicrmw load/store operation for the interlocked
1448  // intrinsics.
1449  llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1450  if (BT.Action == BitTest::Reset) {
1451  Mask = CGF.Builder.CreateNot(Mask);
1452  RMWOp = llvm::AtomicRMWInst::And;
1453  }
1454  OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1455  } else {
1456  // Emit a plain load for the non-interlocked intrinsics.
1457  OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1458  Value *NewByte = nullptr;
1459  switch (BT.Action) {
1460  case BitTest::TestOnly:
1461  // Don't store anything.
1462  break;
1463  case BitTest::Complement:
1464  NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1465  break;
1466  case BitTest::Reset:
1467  NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1468  break;
1469  case BitTest::Set:
1470  NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1471  break;
1472  }
1473  if (NewByte)
1474  CGF.Builder.CreateStore(NewByte, ByteAddr);
1475  }
1476 
1477  // However we loaded the old byte, either by plain load or atomicrmw, shift
1478  // the bit into the low position and mask it to 0 or 1.
1479  Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1480  return CGF.Builder.CreateAnd(
1481  ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1482 }
1483 
1485  unsigned BuiltinID,
1486  const CallExpr *E) {
1487  Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1488 
1490  raw_svector_ostream AsmOS(Asm);
1491  llvm::IntegerType *RetType = CGF.Int32Ty;
1492 
1493  switch (BuiltinID) {
1494  case clang::PPC::BI__builtin_ppc_ldarx:
1495  AsmOS << "ldarx ";
1496  RetType = CGF.Int64Ty;
1497  break;
1498  case clang::PPC::BI__builtin_ppc_lwarx:
1499  AsmOS << "lwarx ";
1500  RetType = CGF.Int32Ty;
1501  break;
1502  case clang::PPC::BI__builtin_ppc_lharx:
1503  AsmOS << "lharx ";
1504  RetType = CGF.Int16Ty;
1505  break;
1506  case clang::PPC::BI__builtin_ppc_lbarx:
1507  AsmOS << "lbarx ";
1508  RetType = CGF.Int8Ty;
1509  break;
1510  default:
1511  llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1512  }
1513 
1514  AsmOS << "$0, ${1:y}";
1515 
1516  std::string Constraints = "=r,*Z,~{memory}";
1517  std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1518  if (!MachineClobbers.empty()) {
1519  Constraints += ',';
1520  Constraints += MachineClobbers;
1521  }
1522 
1523  llvm::Type *PtrType = CGF.UnqualPtrTy;
1524  llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1525 
1526  llvm::InlineAsm *IA =
1527  llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1528  llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1529  CI->addParamAttr(
1530  0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1531  return CI;
1532 }
1533 
1534 namespace {
1535 enum class MSVCSetJmpKind {
1536  _setjmpex,
1537  _setjmp3,
1538  _setjmp
1539 };
1540 }
1541 
1542 /// MSVC handles setjmp a bit differently on different platforms. On every
1543 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
1544 /// parameters can be passed as variadic arguments, but we always pass none.
1545 static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1546  const CallExpr *E) {
1547  llvm::Value *Arg1 = nullptr;
1548  llvm::Type *Arg1Ty = nullptr;
1549  StringRef Name;
1550  bool IsVarArg = false;
1551  if (SJKind == MSVCSetJmpKind::_setjmp3) {
1552  Name = "_setjmp3";
1553  Arg1Ty = CGF.Int32Ty;
1554  Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1555  IsVarArg = true;
1556  } else {
1557  Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1558  Arg1Ty = CGF.Int8PtrTy;
1559  if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1560  Arg1 = CGF.Builder.CreateCall(
1561  CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1562  } else
1563  Arg1 = CGF.Builder.CreateCall(
1564  CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1565  llvm::ConstantInt::get(CGF.Int32Ty, 0));
1566  }
1567 
1568  // Mark the call site and declaration with ReturnsTwice.
1569  llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1570  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1571  CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1572  llvm::Attribute::ReturnsTwice);
1573  llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1574  llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1575  ReturnsTwiceAttr, /*Local=*/true);
1576 
1577  llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1578  CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1579  llvm::Value *Args[] = {Buf, Arg1};
1580  llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1581  CB->setAttributes(ReturnsTwiceAttr);
1582  return RValue::get(CB);
1583 }
1584 
1585 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1586 // we handle them here.
1588  _BitScanForward,
1589  _BitScanReverse,
1590  _InterlockedAnd,
1591  _InterlockedDecrement,
1592  _InterlockedExchange,
1593  _InterlockedExchangeAdd,
1594  _InterlockedExchangeSub,
1595  _InterlockedIncrement,
1596  _InterlockedOr,
1597  _InterlockedXor,
1598  _InterlockedExchangeAdd_acq,
1599  _InterlockedExchangeAdd_rel,
1600  _InterlockedExchangeAdd_nf,
1601  _InterlockedExchange_acq,
1602  _InterlockedExchange_rel,
1603  _InterlockedExchange_nf,
1604  _InterlockedCompareExchange_acq,
1605  _InterlockedCompareExchange_rel,
1606  _InterlockedCompareExchange_nf,
1607  _InterlockedCompareExchange128,
1608  _InterlockedCompareExchange128_acq,
1609  _InterlockedCompareExchange128_rel,
1610  _InterlockedCompareExchange128_nf,
1611  _InterlockedOr_acq,
1612  _InterlockedOr_rel,
1613  _InterlockedOr_nf,
1614  _InterlockedXor_acq,
1615  _InterlockedXor_rel,
1616  _InterlockedXor_nf,
1617  _InterlockedAnd_acq,
1618  _InterlockedAnd_rel,
1619  _InterlockedAnd_nf,
1620  _InterlockedIncrement_acq,
1621  _InterlockedIncrement_rel,
1622  _InterlockedIncrement_nf,
1623  _InterlockedDecrement_acq,
1624  _InterlockedDecrement_rel,
1625  _InterlockedDecrement_nf,
1626  __fastfail,
1627 };
1628 
1629 static std::optional<CodeGenFunction::MSVCIntrin>
1630 translateArmToMsvcIntrin(unsigned BuiltinID) {
1632  switch (BuiltinID) {
1633  default:
1634  return std::nullopt;
1635  case clang::ARM::BI_BitScanForward:
1636  case clang::ARM::BI_BitScanForward64:
1637  return MSVCIntrin::_BitScanForward;
1638  case clang::ARM::BI_BitScanReverse:
1639  case clang::ARM::BI_BitScanReverse64:
1640  return MSVCIntrin::_BitScanReverse;
1641  case clang::ARM::BI_InterlockedAnd64:
1642  return MSVCIntrin::_InterlockedAnd;
1643  case clang::ARM::BI_InterlockedExchange64:
1644  return MSVCIntrin::_InterlockedExchange;
1645  case clang::ARM::BI_InterlockedExchangeAdd64:
1646  return MSVCIntrin::_InterlockedExchangeAdd;
1647  case clang::ARM::BI_InterlockedExchangeSub64:
1648  return MSVCIntrin::_InterlockedExchangeSub;
1649  case clang::ARM::BI_InterlockedOr64:
1650  return MSVCIntrin::_InterlockedOr;
1651  case clang::ARM::BI_InterlockedXor64:
1652  return MSVCIntrin::_InterlockedXor;
1653  case clang::ARM::BI_InterlockedDecrement64:
1654  return MSVCIntrin::_InterlockedDecrement;
1655  case clang::ARM::BI_InterlockedIncrement64:
1656  return MSVCIntrin::_InterlockedIncrement;
1657  case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1658  case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1659  case clang::ARM::BI_InterlockedExchangeAdd_acq:
1660  case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1661  return MSVCIntrin::_InterlockedExchangeAdd_acq;
1662  case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1663  case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1664  case clang::ARM::BI_InterlockedExchangeAdd_rel:
1665  case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1666  return MSVCIntrin::_InterlockedExchangeAdd_rel;
1667  case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1668  case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1669  case clang::ARM::BI_InterlockedExchangeAdd_nf:
1670  case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1671  return MSVCIntrin::_InterlockedExchangeAdd_nf;
1672  case clang::ARM::BI_InterlockedExchange8_acq:
1673  case clang::ARM::BI_InterlockedExchange16_acq:
1674  case clang::ARM::BI_InterlockedExchange_acq:
1675  case clang::ARM::BI_InterlockedExchange64_acq:
1676  return MSVCIntrin::_InterlockedExchange_acq;
1677  case clang::ARM::BI_InterlockedExchange8_rel:
1678  case clang::ARM::BI_InterlockedExchange16_rel:
1679  case clang::ARM::BI_InterlockedExchange_rel:
1680  case clang::ARM::BI_InterlockedExchange64_rel:
1681  return MSVCIntrin::_InterlockedExchange_rel;
1682  case clang::ARM::BI_InterlockedExchange8_nf:
1683  case clang::ARM::BI_InterlockedExchange16_nf:
1684  case clang::ARM::BI_InterlockedExchange_nf:
1685  case clang::ARM::BI_InterlockedExchange64_nf:
1686  return MSVCIntrin::_InterlockedExchange_nf;
1687  case clang::ARM::BI_InterlockedCompareExchange8_acq:
1688  case clang::ARM::BI_InterlockedCompareExchange16_acq:
1689  case clang::ARM::BI_InterlockedCompareExchange_acq:
1690  case clang::ARM::BI_InterlockedCompareExchange64_acq:
1691  return MSVCIntrin::_InterlockedCompareExchange_acq;
1692  case clang::ARM::BI_InterlockedCompareExchange8_rel:
1693  case clang::ARM::BI_InterlockedCompareExchange16_rel:
1694  case clang::ARM::BI_InterlockedCompareExchange_rel:
1695  case clang::ARM::BI_InterlockedCompareExchange64_rel:
1696  return MSVCIntrin::_InterlockedCompareExchange_rel;
1697  case clang::ARM::BI_InterlockedCompareExchange8_nf:
1698  case clang::ARM::BI_InterlockedCompareExchange16_nf:
1699  case clang::ARM::BI_InterlockedCompareExchange_nf:
1700  case clang::ARM::BI_InterlockedCompareExchange64_nf:
1701  return MSVCIntrin::_InterlockedCompareExchange_nf;
1702  case clang::ARM::BI_InterlockedOr8_acq:
1703  case clang::ARM::BI_InterlockedOr16_acq:
1704  case clang::ARM::BI_InterlockedOr_acq:
1705  case clang::ARM::BI_InterlockedOr64_acq:
1706  return MSVCIntrin::_InterlockedOr_acq;
1707  case clang::ARM::BI_InterlockedOr8_rel:
1708  case clang::ARM::BI_InterlockedOr16_rel:
1709  case clang::ARM::BI_InterlockedOr_rel:
1710  case clang::ARM::BI_InterlockedOr64_rel:
1711  return MSVCIntrin::_InterlockedOr_rel;
1712  case clang::ARM::BI_InterlockedOr8_nf:
1713  case clang::ARM::BI_InterlockedOr16_nf:
1714  case clang::ARM::BI_InterlockedOr_nf:
1715  case clang::ARM::BI_InterlockedOr64_nf:
1716  return MSVCIntrin::_InterlockedOr_nf;
1717  case clang::ARM::BI_InterlockedXor8_acq:
1718  case clang::ARM::BI_InterlockedXor16_acq:
1719  case clang::ARM::BI_InterlockedXor_acq:
1720  case clang::ARM::BI_InterlockedXor64_acq:
1721  return MSVCIntrin::_InterlockedXor_acq;
1722  case clang::ARM::BI_InterlockedXor8_rel:
1723  case clang::ARM::BI_InterlockedXor16_rel:
1724  case clang::ARM::BI_InterlockedXor_rel:
1725  case clang::ARM::BI_InterlockedXor64_rel:
1726  return MSVCIntrin::_InterlockedXor_rel;
1727  case clang::ARM::BI_InterlockedXor8_nf:
1728  case clang::ARM::BI_InterlockedXor16_nf:
1729  case clang::ARM::BI_InterlockedXor_nf:
1730  case clang::ARM::BI_InterlockedXor64_nf:
1731  return MSVCIntrin::_InterlockedXor_nf;
1732  case clang::ARM::BI_InterlockedAnd8_acq:
1733  case clang::ARM::BI_InterlockedAnd16_acq:
1734  case clang::ARM::BI_InterlockedAnd_acq:
1735  case clang::ARM::BI_InterlockedAnd64_acq:
1736  return MSVCIntrin::_InterlockedAnd_acq;
1737  case clang::ARM::BI_InterlockedAnd8_rel:
1738  case clang::ARM::BI_InterlockedAnd16_rel:
1739  case clang::ARM::BI_InterlockedAnd_rel:
1740  case clang::ARM::BI_InterlockedAnd64_rel:
1741  return MSVCIntrin::_InterlockedAnd_rel;
1742  case clang::ARM::BI_InterlockedAnd8_nf:
1743  case clang::ARM::BI_InterlockedAnd16_nf:
1744  case clang::ARM::BI_InterlockedAnd_nf:
1745  case clang::ARM::BI_InterlockedAnd64_nf:
1746  return MSVCIntrin::_InterlockedAnd_nf;
1747  case clang::ARM::BI_InterlockedIncrement16_acq:
1748  case clang::ARM::BI_InterlockedIncrement_acq:
1749  case clang::ARM::BI_InterlockedIncrement64_acq:
1750  return MSVCIntrin::_InterlockedIncrement_acq;
1751  case clang::ARM::BI_InterlockedIncrement16_rel:
1752  case clang::ARM::BI_InterlockedIncrement_rel:
1753  case clang::ARM::BI_InterlockedIncrement64_rel:
1754  return MSVCIntrin::_InterlockedIncrement_rel;
1755  case clang::ARM::BI_InterlockedIncrement16_nf:
1756  case clang::ARM::BI_InterlockedIncrement_nf:
1757  case clang::ARM::BI_InterlockedIncrement64_nf:
1758  return MSVCIntrin::_InterlockedIncrement_nf;
1759  case clang::ARM::BI_InterlockedDecrement16_acq:
1760  case clang::ARM::BI_InterlockedDecrement_acq:
1761  case clang::ARM::BI_InterlockedDecrement64_acq:
1762  return MSVCIntrin::_InterlockedDecrement_acq;
1763  case clang::ARM::BI_InterlockedDecrement16_rel:
1764  case clang::ARM::BI_InterlockedDecrement_rel:
1765  case clang::ARM::BI_InterlockedDecrement64_rel:
1766  return MSVCIntrin::_InterlockedDecrement_rel;
1767  case clang::ARM::BI_InterlockedDecrement16_nf:
1768  case clang::ARM::BI_InterlockedDecrement_nf:
1769  case clang::ARM::BI_InterlockedDecrement64_nf:
1770  return MSVCIntrin::_InterlockedDecrement_nf;
1771  }
1772  llvm_unreachable("must return from switch");
1773 }
1774 
1775 static std::optional<CodeGenFunction::MSVCIntrin>
1776 translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1778  switch (BuiltinID) {
1779  default:
1780  return std::nullopt;
1781  case clang::AArch64::BI_BitScanForward:
1782  case clang::AArch64::BI_BitScanForward64:
1783  return MSVCIntrin::_BitScanForward;
1784  case clang::AArch64::BI_BitScanReverse:
1785  case clang::AArch64::BI_BitScanReverse64:
1786  return MSVCIntrin::_BitScanReverse;
1787  case clang::AArch64::BI_InterlockedAnd64:
1788  return MSVCIntrin::_InterlockedAnd;
1789  case clang::AArch64::BI_InterlockedExchange64:
1790  return MSVCIntrin::_InterlockedExchange;
1791  case clang::AArch64::BI_InterlockedExchangeAdd64:
1792  return MSVCIntrin::_InterlockedExchangeAdd;
1793  case clang::AArch64::BI_InterlockedExchangeSub64:
1794  return MSVCIntrin::_InterlockedExchangeSub;
1795  case clang::AArch64::BI_InterlockedOr64:
1796  return MSVCIntrin::_InterlockedOr;
1797  case clang::AArch64::BI_InterlockedXor64:
1798  return MSVCIntrin::_InterlockedXor;
1799  case clang::AArch64::BI_InterlockedDecrement64:
1800  return MSVCIntrin::_InterlockedDecrement;
1801  case clang::AArch64::BI_InterlockedIncrement64:
1802  return MSVCIntrin::_InterlockedIncrement;
1803  case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1804  case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1805  case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1806  case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1807  return MSVCIntrin::_InterlockedExchangeAdd_acq;
1808  case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1809  case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1810  case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1811  case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1812  return MSVCIntrin::_InterlockedExchangeAdd_rel;
1813  case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1814  case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1815  case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1816  case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1817  return MSVCIntrin::_InterlockedExchangeAdd_nf;
1818  case clang::AArch64::BI_InterlockedExchange8_acq:
1819  case clang::AArch64::BI_InterlockedExchange16_acq:
1820  case clang::AArch64::BI_InterlockedExchange_acq:
1821  case clang::AArch64::BI_InterlockedExchange64_acq:
1822  return MSVCIntrin::_InterlockedExchange_acq;
1823  case clang::AArch64::BI_InterlockedExchange8_rel:
1824  case clang::AArch64::BI_InterlockedExchange16_rel:
1825  case clang::AArch64::BI_InterlockedExchange_rel:
1826  case clang::AArch64::BI_InterlockedExchange64_rel:
1827  return MSVCIntrin::_InterlockedExchange_rel;
1828  case clang::AArch64::BI_InterlockedExchange8_nf:
1829  case clang::AArch64::BI_InterlockedExchange16_nf:
1830  case clang::AArch64::BI_InterlockedExchange_nf:
1831  case clang::AArch64::BI_InterlockedExchange64_nf:
1832  return MSVCIntrin::_InterlockedExchange_nf;
1833  case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1834  case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1835  case clang::AArch64::BI_InterlockedCompareExchange_acq:
1836  case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1837  return MSVCIntrin::_InterlockedCompareExchange_acq;
1838  case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1839  case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1840  case clang::AArch64::BI_InterlockedCompareExchange_rel:
1841  case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1842  return MSVCIntrin::_InterlockedCompareExchange_rel;
1843  case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1844  case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1845  case clang::AArch64::BI_InterlockedCompareExchange_nf:
1846  case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1847  return MSVCIntrin::_InterlockedCompareExchange_nf;
1848  case clang::AArch64::BI_InterlockedCompareExchange128:
1849  return MSVCIntrin::_InterlockedCompareExchange128;
1850  case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1851  return MSVCIntrin::_InterlockedCompareExchange128_acq;
1852  case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1853  return MSVCIntrin::_InterlockedCompareExchange128_nf;
1854  case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1855  return MSVCIntrin::_InterlockedCompareExchange128_rel;
1856  case clang::AArch64::BI_InterlockedOr8_acq:
1857  case clang::AArch64::BI_InterlockedOr16_acq:
1858  case clang::AArch64::BI_InterlockedOr_acq:
1859  case clang::AArch64::BI_InterlockedOr64_acq:
1860  return MSVCIntrin::_InterlockedOr_acq;
1861  case clang::AArch64::BI_InterlockedOr8_rel:
1862  case clang::AArch64::BI_InterlockedOr16_rel:
1863  case clang::AArch64::BI_InterlockedOr_rel:
1864  case clang::AArch64::BI_InterlockedOr64_rel:
1865  return MSVCIntrin::_InterlockedOr_rel;
1866  case clang::AArch64::BI_InterlockedOr8_nf:
1867  case clang::AArch64::BI_InterlockedOr16_nf:
1868  case clang::AArch64::BI_InterlockedOr_nf:
1869  case clang::AArch64::BI_InterlockedOr64_nf:
1870  return MSVCIntrin::_InterlockedOr_nf;
1871  case clang::AArch64::BI_InterlockedXor8_acq:
1872  case clang::AArch64::BI_InterlockedXor16_acq:
1873  case clang::AArch64::BI_InterlockedXor_acq:
1874  case clang::AArch64::BI_InterlockedXor64_acq:
1875  return MSVCIntrin::_InterlockedXor_acq;
1876  case clang::AArch64::BI_InterlockedXor8_rel:
1877  case clang::AArch64::BI_InterlockedXor16_rel:
1878  case clang::AArch64::BI_InterlockedXor_rel:
1879  case clang::AArch64::BI_InterlockedXor64_rel:
1880  return MSVCIntrin::_InterlockedXor_rel;
1881  case clang::AArch64::BI_InterlockedXor8_nf:
1882  case clang::AArch64::BI_InterlockedXor16_nf:
1883  case clang::AArch64::BI_InterlockedXor_nf:
1884  case clang::AArch64::BI_InterlockedXor64_nf:
1885  return MSVCIntrin::_InterlockedXor_nf;
1886  case clang::AArch64::BI_InterlockedAnd8_acq:
1887  case clang::AArch64::BI_InterlockedAnd16_acq:
1888  case clang::AArch64::BI_InterlockedAnd_acq:
1889  case clang::AArch64::BI_InterlockedAnd64_acq:
1890  return MSVCIntrin::_InterlockedAnd_acq;
1891  case clang::AArch64::BI_InterlockedAnd8_rel:
1892  case clang::AArch64::BI_InterlockedAnd16_rel:
1893  case clang::AArch64::BI_InterlockedAnd_rel:
1894  case clang::AArch64::BI_InterlockedAnd64_rel:
1895  return MSVCIntrin::_InterlockedAnd_rel;
1896  case clang::AArch64::BI_InterlockedAnd8_nf:
1897  case clang::AArch64::BI_InterlockedAnd16_nf:
1898  case clang::AArch64::BI_InterlockedAnd_nf:
1899  case clang::AArch64::BI_InterlockedAnd64_nf:
1900  return MSVCIntrin::_InterlockedAnd_nf;
1901  case clang::AArch64::BI_InterlockedIncrement16_acq:
1902  case clang::AArch64::BI_InterlockedIncrement_acq:
1903  case clang::AArch64::BI_InterlockedIncrement64_acq:
1904  return MSVCIntrin::_InterlockedIncrement_acq;
1905  case clang::AArch64::BI_InterlockedIncrement16_rel:
1906  case clang::AArch64::BI_InterlockedIncrement_rel:
1907  case clang::AArch64::BI_InterlockedIncrement64_rel:
1908  return MSVCIntrin::_InterlockedIncrement_rel;
1909  case clang::AArch64::BI_InterlockedIncrement16_nf:
1910  case clang::AArch64::BI_InterlockedIncrement_nf:
1911  case clang::AArch64::BI_InterlockedIncrement64_nf:
1912  return MSVCIntrin::_InterlockedIncrement_nf;
1913  case clang::AArch64::BI_InterlockedDecrement16_acq:
1914  case clang::AArch64::BI_InterlockedDecrement_acq:
1915  case clang::AArch64::BI_InterlockedDecrement64_acq:
1916  return MSVCIntrin::_InterlockedDecrement_acq;
1917  case clang::AArch64::BI_InterlockedDecrement16_rel:
1918  case clang::AArch64::BI_InterlockedDecrement_rel:
1919  case clang::AArch64::BI_InterlockedDecrement64_rel:
1920  return MSVCIntrin::_InterlockedDecrement_rel;
1921  case clang::AArch64::BI_InterlockedDecrement16_nf:
1922  case clang::AArch64::BI_InterlockedDecrement_nf:
1923  case clang::AArch64::BI_InterlockedDecrement64_nf:
1924  return MSVCIntrin::_InterlockedDecrement_nf;
1925  }
1926  llvm_unreachable("must return from switch");
1927 }
1928 
1929 static std::optional<CodeGenFunction::MSVCIntrin>
1930 translateX86ToMsvcIntrin(unsigned BuiltinID) {
1932  switch (BuiltinID) {
1933  default:
1934  return std::nullopt;
1935  case clang::X86::BI_BitScanForward:
1936  case clang::X86::BI_BitScanForward64:
1937  return MSVCIntrin::_BitScanForward;
1938  case clang::X86::BI_BitScanReverse:
1939  case clang::X86::BI_BitScanReverse64:
1940  return MSVCIntrin::_BitScanReverse;
1941  case clang::X86::BI_InterlockedAnd64:
1942  return MSVCIntrin::_InterlockedAnd;
1943  case clang::X86::BI_InterlockedCompareExchange128:
1944  return MSVCIntrin::_InterlockedCompareExchange128;
1945  case clang::X86::BI_InterlockedExchange64:
1946  return MSVCIntrin::_InterlockedExchange;
1947  case clang::X86::BI_InterlockedExchangeAdd64:
1948  return MSVCIntrin::_InterlockedExchangeAdd;
1949  case clang::X86::BI_InterlockedExchangeSub64:
1950  return MSVCIntrin::_InterlockedExchangeSub;
1951  case clang::X86::BI_InterlockedOr64:
1952  return MSVCIntrin::_InterlockedOr;
1953  case clang::X86::BI_InterlockedXor64:
1954  return MSVCIntrin::_InterlockedXor;
1955  case clang::X86::BI_InterlockedDecrement64:
1956  return MSVCIntrin::_InterlockedDecrement;
1957  case clang::X86::BI_InterlockedIncrement64:
1958  return MSVCIntrin::_InterlockedIncrement;
1959  }
1960  llvm_unreachable("must return from switch");
1961 }
1962 
1963 // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1965  const CallExpr *E) {
1966  switch (BuiltinID) {
1967  case MSVCIntrin::_BitScanForward:
1968  case MSVCIntrin::_BitScanReverse: {
1969  Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1970  Value *ArgValue = EmitScalarExpr(E->getArg(1));
1971 
1972  llvm::Type *ArgType = ArgValue->getType();
1973  llvm::Type *IndexType = IndexAddress.getElementType();
1974  llvm::Type *ResultType = ConvertType(E->getType());
1975 
1976  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1977  Value *ResZero = llvm::Constant::getNullValue(ResultType);
1978  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1979 
1980  BasicBlock *Begin = Builder.GetInsertBlock();
1981  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1982  Builder.SetInsertPoint(End);
1983  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1984 
1985  Builder.SetInsertPoint(Begin);
1986  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1987  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1988  Builder.CreateCondBr(IsZero, End, NotZero);
1989  Result->addIncoming(ResZero, Begin);
1990 
1991  Builder.SetInsertPoint(NotZero);
1992 
1993  if (BuiltinID == MSVCIntrin::_BitScanForward) {
1994  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1995  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1996  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1997  Builder.CreateStore(ZeroCount, IndexAddress, false);
1998  } else {
1999  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2000  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2001 
2002  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2003  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2004  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2005  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2006  Builder.CreateStore(Index, IndexAddress, false);
2007  }
2008  Builder.CreateBr(End);
2009  Result->addIncoming(ResOne, NotZero);
2010 
2011  Builder.SetInsertPoint(End);
2012  return Result;
2013  }
2014  case MSVCIntrin::_InterlockedAnd:
2015  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2016  case MSVCIntrin::_InterlockedExchange:
2017  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2018  case MSVCIntrin::_InterlockedExchangeAdd:
2019  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2020  case MSVCIntrin::_InterlockedExchangeSub:
2021  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2022  case MSVCIntrin::_InterlockedOr:
2023  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2024  case MSVCIntrin::_InterlockedXor:
2025  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2026  case MSVCIntrin::_InterlockedExchangeAdd_acq:
2027  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2028  AtomicOrdering::Acquire);
2029  case MSVCIntrin::_InterlockedExchangeAdd_rel:
2030  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2031  AtomicOrdering::Release);
2032  case MSVCIntrin::_InterlockedExchangeAdd_nf:
2033  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2034  AtomicOrdering::Monotonic);
2035  case MSVCIntrin::_InterlockedExchange_acq:
2036  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2037  AtomicOrdering::Acquire);
2038  case MSVCIntrin::_InterlockedExchange_rel:
2039  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2040  AtomicOrdering::Release);
2041  case MSVCIntrin::_InterlockedExchange_nf:
2042  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2043  AtomicOrdering::Monotonic);
2044  case MSVCIntrin::_InterlockedCompareExchange_acq:
2045  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2046  case MSVCIntrin::_InterlockedCompareExchange_rel:
2047  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2048  case MSVCIntrin::_InterlockedCompareExchange_nf:
2049  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2050  case MSVCIntrin::_InterlockedCompareExchange128:
2052  *this, E, AtomicOrdering::SequentiallyConsistent);
2053  case MSVCIntrin::_InterlockedCompareExchange128_acq:
2054  return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2055  case MSVCIntrin::_InterlockedCompareExchange128_rel:
2056  return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2057  case MSVCIntrin::_InterlockedCompareExchange128_nf:
2058  return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2059  case MSVCIntrin::_InterlockedOr_acq:
2060  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2061  AtomicOrdering::Acquire);
2062  case MSVCIntrin::_InterlockedOr_rel:
2063  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2064  AtomicOrdering::Release);
2065  case MSVCIntrin::_InterlockedOr_nf:
2066  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2067  AtomicOrdering::Monotonic);
2068  case MSVCIntrin::_InterlockedXor_acq:
2069  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2070  AtomicOrdering::Acquire);
2071  case MSVCIntrin::_InterlockedXor_rel:
2072  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2073  AtomicOrdering::Release);
2074  case MSVCIntrin::_InterlockedXor_nf:
2075  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2076  AtomicOrdering::Monotonic);
2077  case MSVCIntrin::_InterlockedAnd_acq:
2078  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2079  AtomicOrdering::Acquire);
2080  case MSVCIntrin::_InterlockedAnd_rel:
2081  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2082  AtomicOrdering::Release);
2083  case MSVCIntrin::_InterlockedAnd_nf:
2084  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2085  AtomicOrdering::Monotonic);
2086  case MSVCIntrin::_InterlockedIncrement_acq:
2087  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2088  case MSVCIntrin::_InterlockedIncrement_rel:
2089  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2090  case MSVCIntrin::_InterlockedIncrement_nf:
2091  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2092  case MSVCIntrin::_InterlockedDecrement_acq:
2093  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2094  case MSVCIntrin::_InterlockedDecrement_rel:
2095  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2096  case MSVCIntrin::_InterlockedDecrement_nf:
2097  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2098 
2099  case MSVCIntrin::_InterlockedDecrement:
2100  return EmitAtomicDecrementValue(*this, E);
2101  case MSVCIntrin::_InterlockedIncrement:
2102  return EmitAtomicIncrementValue(*this, E);
2103 
2104  case MSVCIntrin::__fastfail: {
2105  // Request immediate process termination from the kernel. The instruction
2106  // sequences to do this are documented on MSDN:
2107  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2108  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2109  StringRef Asm, Constraints;
2110  switch (ISA) {
2111  default:
2112  ErrorUnsupported(E, "__fastfail call for this architecture");
2113  break;
2114  case llvm::Triple::x86:
2115  case llvm::Triple::x86_64:
2116  Asm = "int $$0x29";
2117  Constraints = "{cx}";
2118  break;
2119  case llvm::Triple::thumb:
2120  Asm = "udf #251";
2121  Constraints = "{r0}";
2122  break;
2123  case llvm::Triple::aarch64:
2124  Asm = "brk #0xF003";
2125  Constraints = "{w0}";
2126  }
2127  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2128  llvm::InlineAsm *IA =
2129  llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2130  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2131  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2132  llvm::Attribute::NoReturn);
2133  llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2134  CI->setAttributes(NoReturnAttr);
2135  return CI;
2136  }
2137  }
2138  llvm_unreachable("Incorrect MSVC intrinsic!");
2139 }
2140 
2141 namespace {
2142 // ARC cleanup for __builtin_os_log_format
2143 struct CallObjCArcUse final : EHScopeStack::Cleanup {
2144  CallObjCArcUse(llvm::Value *object) : object(object) {}
2145  llvm::Value *object;
2146 
2147  void Emit(CodeGenFunction &CGF, Flags flags) override {
2148  CGF.EmitARCIntrinsicUse(object);
2149  }
2150 };
2151 }
2152 
2155  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
2156  && "Unsupported builtin check kind");
2157 
2158  Value *ArgValue = EmitScalarExpr(E);
2159  if (!SanOpts.has(SanitizerKind::Builtin))
2160  return ArgValue;
2161 
2162  SanitizerScope SanScope(this);
2163  Value *Cond = Builder.CreateICmpNE(
2164  ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2165  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2166  SanitizerHandler::InvalidBuiltin,
2167  {EmitCheckSourceLocation(E->getExprLoc()),
2168  llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2169  std::nullopt);
2170  return ArgValue;
2171 }
2172 
2173 static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2174  return CGF.Builder.CreateBinaryIntrinsic(
2175  Intrinsic::abs, ArgValue,
2176  ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2177 }
2178 
2180  bool SanitizeOverflow) {
2181  Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2182 
2183  // Try to eliminate overflow check.
2184  if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2185  if (!VCI->isMinSignedValue())
2186  return EmitAbs(CGF, ArgValue, true);
2187  }
2188 
2189  CodeGenFunction::SanitizerScope SanScope(&CGF);
2190 
2191  Constant *Zero = Constant::getNullValue(ArgValue->getType());
2192  Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2193  Intrinsic::ssub_with_overflow, Zero, ArgValue);
2194  Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2195  Value *NotOverflow = CGF.Builder.CreateNot(
2196  CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2197 
2198  // TODO: support -ftrapv-handler.
2199  if (SanitizeOverflow) {
2200  CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2201  SanitizerHandler::NegateOverflow,
2202  {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2203  CGF.EmitCheckTypeDescriptor(E->getType())},
2204  {ArgValue});
2205  } else
2206  CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2207 
2208  Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2209  return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2210 }
2211 
2212 /// Get the argument type for arguments to os_log_helper.
2214  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2215  return C.getCanonicalType(UnsignedTy);
2216 }
2217 
2219  const analyze_os_log::OSLogBufferLayout &Layout,
2220  CharUnits BufferAlignment) {
2221  ASTContext &Ctx = getContext();
2222 
2223  llvm::SmallString<64> Name;
2224  {
2225  raw_svector_ostream OS(Name);
2226  OS << "__os_log_helper";
2227  OS << "_" << BufferAlignment.getQuantity();
2228  OS << "_" << int(Layout.getSummaryByte());
2229  OS << "_" << int(Layout.getNumArgsByte());
2230  for (const auto &Item : Layout.Items)
2231  OS << "_" << int(Item.getSizeByte()) << "_"
2232  << int(Item.getDescriptorByte());
2233  }
2234 
2235  if (llvm::Function *F = CGM.getModule().getFunction(Name))
2236  return F;
2237 
2239  FunctionArgList Args;
2240  Args.push_back(ImplicitParamDecl::Create(
2241  Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2243  ArgTys.emplace_back(Ctx.VoidPtrTy);
2244 
2245  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2246  char Size = Layout.Items[I].getSizeByte();
2247  if (!Size)
2248  continue;
2249 
2250  QualType ArgTy = getOSLogArgType(Ctx, Size);
2251  Args.push_back(ImplicitParamDecl::Create(
2252  Ctx, nullptr, SourceLocation(),
2253  &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2255  ArgTys.emplace_back(ArgTy);
2256  }
2257 
2258  QualType ReturnTy = Ctx.VoidTy;
2259 
2260  // The helper function has linkonce_odr linkage to enable the linker to merge
2261  // identical functions. To ensure the merging always happens, 'noinline' is
2262  // attached to the function when compiling with -Oz.
2263  const CGFunctionInfo &FI =
2264  CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
2265  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2266  llvm::Function *Fn = llvm::Function::Create(
2267  FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2268  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2269  CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2270  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
2271  Fn->setDoesNotThrow();
2272 
2273  // Attach 'noinline' at -Oz.
2274  if (CGM.getCodeGenOpts().OptimizeSize == 2)
2275  Fn->addFnAttr(llvm::Attribute::NoInline);
2276 
2277  auto NL = ApplyDebugLocation::CreateEmpty(*this);
2278  StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2279 
2280  // Create a scope with an artificial location for the body of this function.
2281  auto AL = ApplyDebugLocation::CreateArtificial(*this);
2282 
2283  CharUnits Offset;
2284  Address BufAddr = makeNaturalAddressForPointer(
2285  Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2286  BufferAlignment);
2287  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2288  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2289  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2290  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2291 
2292  unsigned I = 1;
2293  for (const auto &Item : Layout.Items) {
2294  Builder.CreateStore(
2295  Builder.getInt8(Item.getDescriptorByte()),
2296  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2297  Builder.CreateStore(
2298  Builder.getInt8(Item.getSizeByte()),
2299  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2300 
2301  CharUnits Size = Item.size();
2302  if (!Size.getQuantity())
2303  continue;
2304 
2305  Address Arg = GetAddrOfLocalVar(Args[I]);
2306  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2307  Addr = Addr.withElementType(Arg.getElementType());
2308  Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
2309  Offset += Size;
2310  ++I;
2311  }
2312 
2313  FinishFunction();
2314 
2315  return Fn;
2316 }
2317 
2319  assert(E.getNumArgs() >= 2 &&
2320  "__builtin_os_log_format takes at least 2 arguments");
2321  ASTContext &Ctx = getContext();
2324  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2325  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2326 
2327  // Ignore argument 1, the format string. It is not currently used.
2328  CallArgList Args;
2329  Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2330 
2331  for (const auto &Item : Layout.Items) {
2332  int Size = Item.getSizeByte();
2333  if (!Size)
2334  continue;
2335 
2336  llvm::Value *ArgVal;
2337 
2338  if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2339  uint64_t Val = 0;
2340  for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2341  Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2342  ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2343  } else if (const Expr *TheExpr = Item.getExpr()) {
2344  ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2345 
2346  // If a temporary object that requires destruction after the full
2347  // expression is passed, push a lifetime-extended cleanup to extend its
2348  // lifetime to the end of the enclosing block scope.
2349  auto LifetimeExtendObject = [&](const Expr *E) {
2350  E = E->IgnoreParenCasts();
2351  // Extend lifetimes of objects returned by function calls and message
2352  // sends.
2353 
2354  // FIXME: We should do this in other cases in which temporaries are
2355  // created including arguments of non-ARC types (e.g., C++
2356  // temporaries).
2357  if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2358  return true;
2359  return false;
2360  };
2361 
2362  if (TheExpr->getType()->isObjCRetainableType() &&
2363  getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2364  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2365  "Only scalar can be a ObjC retainable type");
2366  if (!isa<Constant>(ArgVal)) {
2367  CleanupKind Cleanup = getARCCleanupKind();
2368  QualType Ty = TheExpr->getType();
2369  RawAddress Alloca = RawAddress::invalid();
2370  RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2371  ArgVal = EmitARCRetain(Ty, ArgVal);
2372  Builder.CreateStore(ArgVal, Addr);
2373  pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2375  Cleanup & EHCleanup);
2376 
2377  // Push a clang.arc.use call to ensure ARC optimizer knows that the
2378  // argument has to be alive.
2379  if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2380  pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2381  }
2382  }
2383  } else {
2384  ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2385  }
2386 
2387  unsigned ArgValSize =
2388  CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2389  llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2390  ArgValSize);
2391  ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2392  CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2393  // If ArgVal has type x86_fp80, zero-extend ArgVal.
2394  ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2395  Args.add(RValue::get(ArgVal), ArgTy);
2396  }
2397 
2398  const CGFunctionInfo &FI =
2399  CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
2400  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
2401  Layout, BufAddr.getAlignment());
2402  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
2403  return RValue::get(BufAddr, *this);
2404 }
2405 
2407  unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2408  WidthAndSignedness ResultInfo) {
2409  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2410  Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2411  !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2412 }
2413 
2415  CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2416  const clang::Expr *Op2, WidthAndSignedness Op2Info,
2417  const clang::Expr *ResultArg, QualType ResultQTy,
2418  WidthAndSignedness ResultInfo) {
2420  Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2421  "Cannot specialize this multiply");
2422 
2423  llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2424  llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2425 
2426  llvm::Value *HasOverflow;
2427  llvm::Value *Result = EmitOverflowIntrinsic(
2428  CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2429 
2430  // The intrinsic call will detect overflow when the value is > UINT_MAX,
2431  // however, since the original builtin had a signed result, we need to report
2432  // an overflow when the result is greater than INT_MAX.
2433  auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2434  llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2435 
2436  llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2437  HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2438 
2439  bool isVolatile =
2440  ResultArg->getType()->getPointeeType().isVolatileQualified();
2441  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2442  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2443  isVolatile);
2444  return RValue::get(HasOverflow);
2445 }
2446 
2447 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
2448 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2449  WidthAndSignedness Op1Info,
2450  WidthAndSignedness Op2Info,
2451  WidthAndSignedness ResultInfo) {
2452  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2453  std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2454  Op1Info.Signed != Op2Info.Signed;
2455 }
2456 
2457 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2458 /// the generic checked-binop irgen.
2459 static RValue
2461  WidthAndSignedness Op1Info, const clang::Expr *Op2,
2462  WidthAndSignedness Op2Info,
2463  const clang::Expr *ResultArg, QualType ResultQTy,
2464  WidthAndSignedness ResultInfo) {
2465  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2466  Op2Info, ResultInfo) &&
2467  "Not a mixed-sign multipliction we can specialize");
2468 
2469  // Emit the signed and unsigned operands.
2470  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2471  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2472  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2473  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2474  unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2475  unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2476 
2477  // One of the operands may be smaller than the other. If so, [s|z]ext it.
2478  if (SignedOpWidth < UnsignedOpWidth)
2479  Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2480  if (UnsignedOpWidth < SignedOpWidth)
2481  Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2482 
2483  llvm::Type *OpTy = Signed->getType();
2484  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2485  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2486  llvm::Type *ResTy = ResultPtr.getElementType();
2487  unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2488 
2489  // Take the absolute value of the signed operand.
2490  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2491  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2492  llvm::Value *AbsSigned =
2493  CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2494 
2495  // Perform a checked unsigned multiplication.
2496  llvm::Value *UnsignedOverflow;
2497  llvm::Value *UnsignedResult =
2498  EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2499  Unsigned, UnsignedOverflow);
2500 
2501  llvm::Value *Overflow, *Result;
2502  if (ResultInfo.Signed) {
2503  // Signed overflow occurs if the result is greater than INT_MAX or lesser
2504  // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2505  auto IntMax =
2506  llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2507  llvm::Value *MaxResult =
2508  CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2509  CGF.Builder.CreateZExt(IsNegative, OpTy));
2510  llvm::Value *SignedOverflow =
2511  CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2512  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2513 
2514  // Prepare the signed result (possibly by negating it).
2515  llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2516  llvm::Value *SignedResult =
2517  CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2518  Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2519  } else {
2520  // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2521  llvm::Value *Underflow = CGF.Builder.CreateAnd(
2522  IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2523  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2524  if (ResultInfo.Width < OpWidth) {
2525  auto IntMax =
2526  llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2527  llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2528  UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2529  Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2530  }
2531 
2532  // Negate the product if it would be negative in infinite precision.
2533  Result = CGF.Builder.CreateSelect(
2534  IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2535 
2536  Result = CGF.Builder.CreateTrunc(Result, ResTy);
2537  }
2538  assert(Overflow && Result && "Missing overflow or result");
2539 
2540  bool isVolatile =
2541  ResultArg->getType()->getPointeeType().isVolatileQualified();
2542  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2543  isVolatile);
2544  return RValue::get(Overflow);
2545 }
2546 
2547 static bool
2549  llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2550  if (const auto *Arr = Ctx.getAsArrayType(Ty))
2551  Ty = Ctx.getBaseElementType(Arr);
2552 
2553  const auto *Record = Ty->getAsCXXRecordDecl();
2554  if (!Record)
2555  return false;
2556 
2557  // We've already checked this type, or are in the process of checking it.
2558  if (!Seen.insert(Record).second)
2559  return false;
2560 
2561  assert(Record->hasDefinition() &&
2562  "Incomplete types should already be diagnosed");
2563 
2564  if (Record->isDynamicClass())
2565  return true;
2566 
2567  for (FieldDecl *F : Record->fields()) {
2568  if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2569  return true;
2570  }
2571  return false;
2572 }
2573 
2574 /// Determine if the specified type requires laundering by checking if it is a
2575 /// dynamic class type or contains a subobject which is a dynamic class type.
2577  if (!CGM.getCodeGenOpts().StrictVTablePointers)
2578  return false;
2580  return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2581 }
2582 
2583 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2584  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2585  llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2586 
2587  // The builtin's shift arg may have a different type than the source arg and
2588  // result, but the LLVM intrinsic uses the same type for all values.
2589  llvm::Type *Ty = Src->getType();
2590  ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2591 
2592  // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2593  unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2594  Function *F = CGM.getIntrinsic(IID, Ty);
2595  return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2596 }
2597 
2598 // Map math builtins for long-double to f128 version.
2599 static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2600  switch (BuiltinID) {
2601 #define MUTATE_LDBL(func) \
2602  case Builtin::BI__builtin_##func##l: \
2603  return Builtin::BI__builtin_##func##f128;
2604  MUTATE_LDBL(sqrt)
2605  MUTATE_LDBL(cbrt)
2606  MUTATE_LDBL(fabs)
2607  MUTATE_LDBL(log)
2608  MUTATE_LDBL(log2)
2611  MUTATE_LDBL(logb)
2612  MUTATE_LDBL(exp)
2613  MUTATE_LDBL(exp2)
2615  MUTATE_LDBL(fdim)
2618  MUTATE_LDBL(pow)
2619  MUTATE_LDBL(fmin)
2620  MUTATE_LDBL(fmax)
2621  MUTATE_LDBL(ceil)
2623  MUTATE_LDBL(rint)
2631  MUTATE_LDBL(fmod)
2632  MUTATE_LDBL(modf)
2633  MUTATE_LDBL(nan)
2634  MUTATE_LDBL(nans)
2635  MUTATE_LDBL(inf)
2636  MUTATE_LDBL(fma)
2637  MUTATE_LDBL(sin)
2638  MUTATE_LDBL(cos)
2639  MUTATE_LDBL(tan)
2640  MUTATE_LDBL(sinh)
2641  MUTATE_LDBL(cosh)
2642  MUTATE_LDBL(tanh)
2643  MUTATE_LDBL(asin)
2644  MUTATE_LDBL(acos)
2645  MUTATE_LDBL(atan)
2650  MUTATE_LDBL(erf)
2651  MUTATE_LDBL(erfc)
2654  MUTATE_LDBL(huge_val)
2664 #undef MUTATE_LDBL
2665  default:
2666  return BuiltinID;
2667  }
2668 }
2669 
2670 static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2671  Value *V) {
2672  if (CGF.Builder.getIsFPConstrained() &&
2673  CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2674  if (Value *Result =
2675  CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2676  return Result;
2677  }
2678  return nullptr;
2679 }
2680 
2682  const FunctionDecl *FD) {
2683  auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2684  auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2685  auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2686 
2688  for (auto &&FormalTy : FnTy->params())
2689  Args.push_back(llvm::PoisonValue::get(FormalTy));
2690 
2691  return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2692 }
2693 
2695  const CallExpr *E,
2696  ReturnValueSlot ReturnValue) {
2697  const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2698  // See if we can constant fold this builtin. If so, don't emit it at all.
2699  // TODO: Extend this handling to all builtin calls that we can constant-fold.
2700  Expr::EvalResult Result;
2701  if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2702  !Result.hasSideEffects()) {
2703  if (Result.Val.isInt())
2704  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2705  Result.Val.getInt()));
2706  if (Result.Val.isFloat())
2707  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2708  Result.Val.getFloat()));
2709  }
2710 
2711  CurrentBuiltinIDRAII CB(*this, BuiltinID);
2712 
2713  // If current long-double semantics is IEEE 128-bit, replace math builtins
2714  // of long-double with f128 equivalent.
2715  // TODO: This mutation should also be applied to other targets other than PPC,
2716  // after backend supports IEEE 128-bit style libcalls.
2717  if (getTarget().getTriple().isPPC64() &&
2718  &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2719  BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2720 
2721  // If the builtin has been declared explicitly with an assembler label,
2722  // disable the specialized emitting below. Ideally we should communicate the
2723  // rename in IR, or at least avoid generating the intrinsic calls that are
2724  // likely to get lowered to the renamed library functions.
2725  const unsigned BuiltinIDIfNoAsmLabel =
2726  FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2727 
2728  std::optional<bool> ErrnoOverriden;
2729  // ErrnoOverriden is true if math-errno is overriden via the
2730  // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2731  // which implies math-errno.
2732  if (E->hasStoredFPFeatures()) {
2733  FPOptionsOverride OP = E->getFPFeatures();
2734  if (OP.hasMathErrnoOverride())
2735  ErrnoOverriden = OP.getMathErrnoOverride();
2736  }
2737  // True if 'atttibute__((optnone)) is used. This attibute overrides
2738  // fast-math which implies math-errno.
2739  bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2740 
2741  // True if we are compiling at -O2 and errno has been disabled
2742  // using the '#pragma float_control(precise, off)', and
2743  // attribute opt-none hasn't been seen.
2744  bool ErrnoOverridenToFalseWithOpt =
2745  ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2746  CGM.getCodeGenOpts().OptimizationLevel != 0;
2747 
2748  // There are LLVM math intrinsics/instructions corresponding to math library
2749  // functions except the LLVM op will never set errno while the math library
2750  // might. Also, math builtins have the same semantics as their math library
2751  // twins. Thus, we can transform math library and builtin calls to their
2752  // LLVM counterparts if the call is marked 'const' (known to never set errno).
2753  // In case FP exceptions are enabled, the experimental versions of the
2754  // intrinsics model those.
2755  bool ConstAlways =
2756  getContext().BuiltinInfo.isConst(BuiltinID);
2757 
2758  // There's a special case with the fma builtins where they are always const
2759  // if the target environment is GNU or the target is OS is Windows and we're
2760  // targeting the MSVCRT.dll environment.
2761  // FIXME: This list can be become outdated. Need to find a way to get it some
2762  // other way.
2763  switch (BuiltinID) {
2764  case Builtin::BI__builtin_fma:
2765  case Builtin::BI__builtin_fmaf:
2766  case Builtin::BI__builtin_fmal:
2767  case Builtin::BIfma:
2768  case Builtin::BIfmaf:
2769  case Builtin::BIfmal: {
2770  auto &Trip = CGM.getTriple();
2771  if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2772  ConstAlways = true;
2773  break;
2774  }
2775  default:
2776  break;
2777  }
2778 
2779  bool ConstWithoutErrnoAndExceptions =
2780  getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
2781  bool ConstWithoutExceptions =
2782  getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
2783 
2784  // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2785  // disabled.
2786  // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2787  // or attributes that affect math-errno should prevent or allow math
2788  // intrincs to be generated. Intrinsics are generated:
2789  // 1- In fast math mode, unless math-errno is overriden
2790  // via '#pragma float_control(precise, on)', or via an
2791  // 'attribute__((optnone))'.
2792  // 2- If math-errno was enabled on command line but overriden
2793  // to false via '#pragma float_control(precise, off))' and
2794  // 'attribute__((optnone))' hasn't been used.
2795  // 3- If we are compiling with optimization and errno has been disabled
2796  // via '#pragma float_control(precise, off)', and
2797  // 'attribute__((optnone))' hasn't been used.
2798 
2799  bool ConstWithoutErrnoOrExceptions =
2800  ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2801  bool GenerateIntrinsics =
2802  (ConstAlways && !OptNone) ||
2803  (!getLangOpts().MathErrno &&
2804  !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2805  if (!GenerateIntrinsics) {
2806  GenerateIntrinsics =
2807  ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2808  if (!GenerateIntrinsics)
2809  GenerateIntrinsics =
2810  ConstWithoutErrnoOrExceptions &&
2811  (!getLangOpts().MathErrno &&
2812  !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2813  if (!GenerateIntrinsics)
2814  GenerateIntrinsics =
2815  ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2816  }
2817  if (GenerateIntrinsics &&
2818  !(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) {
2819  switch (BuiltinIDIfNoAsmLabel) {
2820  case Builtin::BIceil:
2821  case Builtin::BIceilf:
2822  case Builtin::BIceill:
2823  case Builtin::BI__builtin_ceil:
2824  case Builtin::BI__builtin_ceilf:
2825  case Builtin::BI__builtin_ceilf16:
2826  case Builtin::BI__builtin_ceill:
2827  case Builtin::BI__builtin_ceilf128:
2830  Intrinsic::experimental_constrained_ceil));
2831 
2832  case Builtin::BIcopysign:
2833  case Builtin::BIcopysignf:
2834  case Builtin::BIcopysignl:
2835  case Builtin::BI__builtin_copysign:
2836  case Builtin::BI__builtin_copysignf:
2837  case Builtin::BI__builtin_copysignf16:
2838  case Builtin::BI__builtin_copysignl:
2839  case Builtin::BI__builtin_copysignf128:
2841 
2842  case Builtin::BIcos:
2843  case Builtin::BIcosf:
2844  case Builtin::BIcosl:
2845  case Builtin::BI__builtin_cos:
2846  case Builtin::BI__builtin_cosf:
2847  case Builtin::BI__builtin_cosf16:
2848  case Builtin::BI__builtin_cosl:
2849  case Builtin::BI__builtin_cosf128:
2851  *this, E, Intrinsic::cos, Intrinsic::experimental_constrained_cos,
2852  Intrinsic::fpbuiltin_cos));
2853 
2854  case Builtin::BIexp:
2855  case Builtin::BIexpf:
2856  case Builtin::BIexpl:
2857  case Builtin::BI__builtin_exp:
2858  case Builtin::BI__builtin_expf:
2859  case Builtin::BI__builtin_expf16:
2860  case Builtin::BI__builtin_expl:
2861  case Builtin::BI__builtin_expf128:
2863  *this, E, Intrinsic::exp, Intrinsic::experimental_constrained_exp,
2864  Intrinsic::fpbuiltin_exp));
2865 
2866  case Builtin::BIexp2:
2867  case Builtin::BIexp2f:
2868  case Builtin::BIexp2l:
2869  case Builtin::BI__builtin_exp2:
2870  case Builtin::BI__builtin_exp2f:
2871  case Builtin::BI__builtin_exp2f16:
2872  case Builtin::BI__builtin_exp2l:
2873  case Builtin::BI__builtin_exp2f128:
2875  *this, E, Intrinsic::exp2, Intrinsic::experimental_constrained_exp2,
2876  Intrinsic::fpbuiltin_exp2));
2877  case Builtin::BI__builtin_exp10:
2878  case Builtin::BI__builtin_exp10f:
2879  case Builtin::BI__builtin_exp10f16:
2880  case Builtin::BI__builtin_exp10l:
2881  case Builtin::BI__builtin_exp10f128: {
2882  // TODO: strictfp support
2883  if (Builder.getIsFPConstrained())
2884  break;
2885  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10));
2886  }
2887  case Builtin::BIfabs:
2888  case Builtin::BIfabsf:
2889  case Builtin::BIfabsl:
2890  case Builtin::BI__builtin_fabs:
2891  case Builtin::BI__builtin_fabsf:
2892  case Builtin::BI__builtin_fabsf16:
2893  case Builtin::BI__builtin_fabsl:
2894  case Builtin::BI__builtin_fabsf128:
2895  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2896 
2897  case Builtin::BIfloor:
2898  case Builtin::BIfloorf:
2899  case Builtin::BIfloorl:
2900  case Builtin::BI__builtin_floor:
2901  case Builtin::BI__builtin_floorf:
2902  case Builtin::BI__builtin_floorf16:
2903  case Builtin::BI__builtin_floorl:
2904  case Builtin::BI__builtin_floorf128:
2907  Intrinsic::experimental_constrained_floor));
2908 
2909  case Builtin::BIfma:
2910  case Builtin::BIfmaf:
2911  case Builtin::BIfmal:
2912  case Builtin::BI__builtin_fma:
2913  case Builtin::BI__builtin_fmaf:
2914  case Builtin::BI__builtin_fmaf16:
2915  case Builtin::BI__builtin_fmal:
2916  case Builtin::BI__builtin_fmaf128:
2919  Intrinsic::experimental_constrained_fma));
2920 
2921  case Builtin::BIfmax:
2922  case Builtin::BIfmaxf:
2923  case Builtin::BIfmaxl:
2924  case Builtin::BI__builtin_fmax:
2925  case Builtin::BI__builtin_fmaxf:
2926  case Builtin::BI__builtin_fmaxf16:
2927  case Builtin::BI__builtin_fmaxl:
2928  case Builtin::BI__builtin_fmaxf128:
2930  Intrinsic::maxnum,
2931  Intrinsic::experimental_constrained_maxnum));
2932 
2933  case Builtin::BIfmin:
2934  case Builtin::BIfminf:
2935  case Builtin::BIfminl:
2936  case Builtin::BI__builtin_fmin:
2937  case Builtin::BI__builtin_fminf:
2938  case Builtin::BI__builtin_fminf16:
2939  case Builtin::BI__builtin_fminl:
2940  case Builtin::BI__builtin_fminf128:
2942  Intrinsic::minnum,
2943  Intrinsic::experimental_constrained_minnum));
2944 
2945  // fmod() is a special-case. It maps to the frem instruction rather than an
2946  // LLVM intrinsic.
2947  case Builtin::BIfmod:
2948  case Builtin::BIfmodf:
2949  case Builtin::BIfmodl:
2950  case Builtin::BI__builtin_fmod:
2951  case Builtin::BI__builtin_fmodf:
2952  case Builtin::BI__builtin_fmodf16:
2953  case Builtin::BI__builtin_fmodl:
2954  case Builtin::BI__builtin_fmodf128: {
2955  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2956  Value *Arg1 = EmitScalarExpr(E->getArg(0));
2957  Value *Arg2 = EmitScalarExpr(E->getArg(1));
2958  return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2959  }
2960 
2961  case Builtin::BIlog:
2962  case Builtin::BIlogf:
2963  case Builtin::BIlogl:
2964  case Builtin::BI__builtin_log:
2965  case Builtin::BI__builtin_logf:
2966  case Builtin::BI__builtin_logf16:
2967  case Builtin::BI__builtin_logl:
2968  case Builtin::BI__builtin_logf128:
2970  *this, E, Intrinsic::log, Intrinsic::experimental_constrained_log,
2971  Intrinsic::fpbuiltin_log));
2972 
2973  case Builtin::BIlog10:
2974  case Builtin::BIlog10f:
2975  case Builtin::BIlog10l:
2976  case Builtin::BI__builtin_log10:
2977  case Builtin::BI__builtin_log10f:
2978  case Builtin::BI__builtin_log10f16:
2979  case Builtin::BI__builtin_log10l:
2980  case Builtin::BI__builtin_log10f128:
2982  *this, E, Intrinsic::log10, Intrinsic::experimental_constrained_log10,
2983  Intrinsic::fpbuiltin_log10));
2984 
2985  case Builtin::BIlog2:
2986  case Builtin::BIlog2f:
2987  case Builtin::BIlog2l:
2988  case Builtin::BI__builtin_log2:
2989  case Builtin::BI__builtin_log2f:
2990  case Builtin::BI__builtin_log2f16:
2991  case Builtin::BI__builtin_log2l:
2992  case Builtin::BI__builtin_log2f128:
2994  *this, E, Intrinsic::log2, Intrinsic::experimental_constrained_log2,
2995  Intrinsic::fpbuiltin_log2));
2996 
2997  case Builtin::BInearbyint:
2998  case Builtin::BInearbyintf:
2999  case Builtin::BInearbyintl:
3000  case Builtin::BI__builtin_nearbyint:
3001  case Builtin::BI__builtin_nearbyintf:
3002  case Builtin::BI__builtin_nearbyintl:
3003  case Builtin::BI__builtin_nearbyintf128:
3006  Intrinsic::experimental_constrained_nearbyint));
3007 
3008  case Builtin::BIpow:
3009  case Builtin::BIpowf:
3010  case Builtin::BIpowl:
3011  case Builtin::BI__builtin_pow:
3012  case Builtin::BI__builtin_powf:
3013  case Builtin::BI__builtin_powf16:
3014  case Builtin::BI__builtin_powl:
3015  case Builtin::BI__builtin_powf128:
3017  *this, E, Intrinsic::pow, Intrinsic::experimental_constrained_pow,
3018  Intrinsic::fpbuiltin_pow));
3019 
3020  case Builtin::BIrint:
3021  case Builtin::BIrintf:
3022  case Builtin::BIrintl:
3023  case Builtin::BI__builtin_rint:
3024  case Builtin::BI__builtin_rintf:
3025  case Builtin::BI__builtin_rintf16:
3026  case Builtin::BI__builtin_rintl:
3027  case Builtin::BI__builtin_rintf128:
3030  Intrinsic::experimental_constrained_rint));
3031 
3032  case Builtin::BIround:
3033  case Builtin::BIroundf:
3034  case Builtin::BIroundl:
3035  case Builtin::BI__builtin_round:
3036  case Builtin::BI__builtin_roundf:
3037  case Builtin::BI__builtin_roundf16:
3038  case Builtin::BI__builtin_roundl:
3039  case Builtin::BI__builtin_roundf128:
3042  Intrinsic::experimental_constrained_round));
3043 
3044  case Builtin::BIroundeven:
3045  case Builtin::BIroundevenf:
3046  case Builtin::BIroundevenl:
3047  case Builtin::BI__builtin_roundeven:
3048  case Builtin::BI__builtin_roundevenf:
3049  case Builtin::BI__builtin_roundevenf16:
3050  case Builtin::BI__builtin_roundevenl:
3051  case Builtin::BI__builtin_roundevenf128:
3053  Intrinsic::roundeven,
3054  Intrinsic::experimental_constrained_roundeven));
3055 
3056  case Builtin::BIsin:
3057  case Builtin::BIsinf:
3058  case Builtin::BIsinl:
3059  case Builtin::BI__builtin_sin:
3060  case Builtin::BI__builtin_sinf:
3061  case Builtin::BI__builtin_sinf16:
3062  case Builtin::BI__builtin_sinl:
3063  case Builtin::BI__builtin_sinf128:
3065  *this, E, Intrinsic::sin, Intrinsic::experimental_constrained_sin,
3066  Intrinsic::fpbuiltin_sin));
3067 
3068  case Builtin::BIsqrt:
3069  case Builtin::BIsqrtf:
3070  case Builtin::BIsqrtl:
3071  case Builtin::BI__builtin_sqrt:
3072  case Builtin::BI__builtin_sqrtf:
3073  case Builtin::BI__builtin_sqrtf16:
3074  case Builtin::BI__builtin_sqrtl:
3075  case Builtin::BI__builtin_sqrtf128:
3076  case Builtin::BI__builtin_elementwise_sqrt: {
3077  llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
3078  *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt, Intrinsic::fpbuiltin_sqrt);
3079  SetSqrtFPAccuracy(Call);
3080  return RValue::get(Call);
3081  }
3082  case Builtin::BItrunc:
3083  case Builtin::BItruncf:
3084  case Builtin::BItruncl:
3085  case Builtin::BI__builtin_trunc:
3086  case Builtin::BI__builtin_truncf:
3087  case Builtin::BI__builtin_truncf16:
3088  case Builtin::BI__builtin_truncl:
3089  case Builtin::BI__builtin_truncf128:
3092  Intrinsic::experimental_constrained_trunc));
3093 
3094  case Builtin::BIlround:
3095  case Builtin::BIlroundf:
3096  case Builtin::BIlroundl:
3097  case Builtin::BI__builtin_lround:
3098  case Builtin::BI__builtin_lroundf:
3099  case Builtin::BI__builtin_lroundl:
3100  case Builtin::BI__builtin_lroundf128:
3102  *this, E, Intrinsic::lround,
3103  Intrinsic::experimental_constrained_lround));
3104 
3105  case Builtin::BIllround:
3106  case Builtin::BIllroundf:
3107  case Builtin::BIllroundl:
3108  case Builtin::BI__builtin_llround:
3109  case Builtin::BI__builtin_llroundf:
3110  case Builtin::BI__builtin_llroundl:
3111  case Builtin::BI__builtin_llroundf128:
3113  *this, E, Intrinsic::llround,
3114  Intrinsic::experimental_constrained_llround));
3115 
3116  case Builtin::BIlrint:
3117  case Builtin::BIlrintf:
3118  case Builtin::BIlrintl:
3119  case Builtin::BI__builtin_lrint:
3120  case Builtin::BI__builtin_lrintf:
3121  case Builtin::BI__builtin_lrintl:
3122  case Builtin::BI__builtin_lrintf128:
3124  *this, E, Intrinsic::lrint,
3125  Intrinsic::experimental_constrained_lrint));
3126 
3127  case Builtin::BIllrint:
3128  case Builtin::BIllrintf:
3129  case Builtin::BIllrintl:
3130  case Builtin::BI__builtin_llrint:
3131  case Builtin::BI__builtin_llrintf:
3132  case Builtin::BI__builtin_llrintl:
3133  case Builtin::BI__builtin_llrintf128:
3135  *this, E, Intrinsic::llrint,
3136  Intrinsic::experimental_constrained_llrint));
3137  case Builtin::BI__builtin_ldexp:
3138  case Builtin::BI__builtin_ldexpf:
3139  case Builtin::BI__builtin_ldexpl:
3140  case Builtin::BI__builtin_ldexpf16:
3141  case Builtin::BI__builtin_ldexpf128: {
3143  *this, E, Intrinsic::ldexp,
3144  Intrinsic::experimental_constrained_ldexp));
3145  }
3146  default:
3147  break;
3148  }
3149  }
3150 
3151  // Check NonnullAttribute/NullabilityArg and Alignment.
3152  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3153  unsigned ParmNum) {
3154  Value *Val = A.emitRawPointer(*this);
3155  EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3156  ParmNum);
3157 
3158  if (SanOpts.has(SanitizerKind::Alignment)) {
3159  SanitizerSet SkippedChecks;
3160  SkippedChecks.set(SanitizerKind::All);
3161  SkippedChecks.clear(SanitizerKind::Alignment);
3162  SourceLocation Loc = Arg->getExprLoc();
3163  // Strip an implicit cast.
3164  if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3165  if (CE->getCastKind() == CK_BitCast)
3166  Arg = CE->getSubExpr();
3167  EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3168  SkippedChecks);
3169  }
3170  };
3171 
3172  switch (BuiltinIDIfNoAsmLabel) {
3173  default: break;
3174  case Builtin::BI__builtin___CFStringMakeConstantString:
3175  case Builtin::BI__builtin___NSStringMakeConstantString:
3176  return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3177  case Builtin::BI__builtin_stdarg_start:
3178  case Builtin::BI__builtin_va_start:
3179  case Builtin::BI__va_start:
3180  case Builtin::BI__builtin_va_end:
3181  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3182  ? EmitScalarExpr(E->getArg(0))
3183  : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3184  BuiltinID != Builtin::BI__builtin_va_end);
3185  return RValue::get(nullptr);
3186  case Builtin::BI__builtin_va_copy: {
3187  Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3188  Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3189  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3190  {DstPtr, SrcPtr});
3191  return RValue::get(nullptr);
3192  }
3193  case Builtin::BIabs:
3194  case Builtin::BIlabs:
3195  case Builtin::BIllabs:
3196  case Builtin::BI__builtin_abs:
3197  case Builtin::BI__builtin_labs:
3198  case Builtin::BI__builtin_llabs: {
3199  bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3200 
3201  Value *Result;
3202  switch (getLangOpts().getSignedOverflowBehavior()) {
3204  Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3205  break;
3207  if (!SanitizeOverflow) {
3208  Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3209  break;
3210  }
3211  [[fallthrough]];
3213  // TODO: Somehow handle the corner case when the address of abs is taken.
3214  Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3215  break;
3216  }
3217  return RValue::get(Result);
3218  }
3219  case Builtin::BI__builtin_complex: {
3220  Value *Real = EmitScalarExpr(E->getArg(0));
3221  Value *Imag = EmitScalarExpr(E->getArg(1));
3222  return RValue::getComplex({Real, Imag});
3223  }
3224  case Builtin::BI__builtin_conj:
3225  case Builtin::BI__builtin_conjf:
3226  case Builtin::BI__builtin_conjl:
3227  case Builtin::BIconj:
3228  case Builtin::BIconjf:
3229  case Builtin::BIconjl: {
3230  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3231  Value *Real = ComplexVal.first;
3232  Value *Imag = ComplexVal.second;
3233  Imag = Builder.CreateFNeg(Imag, "neg");
3234  return RValue::getComplex(std::make_pair(Real, Imag));
3235  }
3236  case Builtin::BI__builtin_creal:
3237  case Builtin::BI__builtin_crealf:
3238  case Builtin::BI__builtin_creall:
3239  case Builtin::BIcreal:
3240  case Builtin::BIcrealf:
3241  case Builtin::BIcreall: {
3242  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3243  return RValue::get(ComplexVal.first);
3244  }
3245 
3246  case Builtin::BI__builtin_preserve_access_index: {
3247  // Only enabled preserved access index region when debuginfo
3248  // is available as debuginfo is needed to preserve user-level
3249  // access pattern.
3250  if (!getDebugInfo()) {
3251  CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3252  return RValue::get(EmitScalarExpr(E->getArg(0)));
3253  }
3254 
3255  // Nested builtin_preserve_access_index() not supported
3256  if (IsInPreservedAIRegion) {
3257  CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3258  return RValue::get(EmitScalarExpr(E->getArg(0)));
3259  }
3260 
3261  IsInPreservedAIRegion = true;
3262  Value *Res = EmitScalarExpr(E->getArg(0));
3263  IsInPreservedAIRegion = false;
3264  return RValue::get(Res);
3265  }
3266 
3267  case Builtin::BI__builtin_cimag:
3268  case Builtin::BI__builtin_cimagf:
3269  case Builtin::BI__builtin_cimagl:
3270  case Builtin::BIcimag:
3271  case Builtin::BIcimagf:
3272  case Builtin::BIcimagl: {
3273  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3274  return RValue::get(ComplexVal.second);
3275  }
3276 
3277  case Builtin::BI__builtin_clrsb:
3278  case Builtin::BI__builtin_clrsbl:
3279  case Builtin::BI__builtin_clrsbll: {
3280  // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3281  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3282 
3283  llvm::Type *ArgType = ArgValue->getType();
3284  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3285 
3286  llvm::Type *ResultType = ConvertType(E->getType());
3287  Value *Zero = llvm::Constant::getNullValue(ArgType);
3288  Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3289  Value *Inverse = Builder.CreateNot(ArgValue, "not");
3290  Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3291  Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3292  Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3293  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3294  "cast");
3295  return RValue::get(Result);
3296  }
3297  case Builtin::BI__builtin_ctzs:
3298  case Builtin::BI__builtin_ctz:
3299  case Builtin::BI__builtin_ctzl:
3300  case Builtin::BI__builtin_ctzll:
3301  case Builtin::BI__builtin_ctzg: {
3302  bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3303  E->getNumArgs() > 1;
3304 
3305  Value *ArgValue =
3306  HasFallback ? EmitScalarExpr(E->getArg(0))
3307  : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
3308 
3309  llvm::Type *ArgType = ArgValue->getType();
3310  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3311 
3312  llvm::Type *ResultType = ConvertType(E->getType());
3313  Value *ZeroUndef =
3314  Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3315  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3316  if (Result->getType() != ResultType)
3317  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3318  "cast");
3319  if (!HasFallback)
3320  return RValue::get(Result);
3321 
3322  Value *Zero = Constant::getNullValue(ArgType);
3323  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3324  Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3325  Value *ResultOrFallback =
3326  Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3327  return RValue::get(ResultOrFallback);
3328  }
3329  case Builtin::BI__builtin_clzs:
3330  case Builtin::BI__builtin_clz:
3331  case Builtin::BI__builtin_clzl:
3332  case Builtin::BI__builtin_clzll:
3333  case Builtin::BI__builtin_clzg: {
3334  bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3335  E->getNumArgs() > 1;
3336 
3337  Value *ArgValue =
3338  HasFallback ? EmitScalarExpr(E->getArg(0))
3339  : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
3340 
3341  llvm::Type *ArgType = ArgValue->getType();
3342  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3343 
3344  llvm::Type *ResultType = ConvertType(E->getType());
3345  Value *ZeroUndef =
3346  Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3347  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3348  if (Result->getType() != ResultType)
3349  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3350  "cast");
3351  if (!HasFallback)
3352  return RValue::get(Result);
3353 
3354  Value *Zero = Constant::getNullValue(ArgType);
3355  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3356  Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3357  Value *ResultOrFallback =
3358  Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3359  return RValue::get(ResultOrFallback);
3360  }
3361  case Builtin::BI__builtin_ffs:
3362  case Builtin::BI__builtin_ffsl:
3363  case Builtin::BI__builtin_ffsll: {
3364  // ffs(x) -> x ? cttz(x) + 1 : 0
3365  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3366 
3367  llvm::Type *ArgType = ArgValue->getType();
3368  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3369 
3370  llvm::Type *ResultType = ConvertType(E->getType());
3371  Value *Tmp =
3372  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3373  llvm::ConstantInt::get(ArgType, 1));
3374  Value *Zero = llvm::Constant::getNullValue(ArgType);
3375  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3376  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3377  if (Result->getType() != ResultType)
3378  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3379  "cast");
3380  return RValue::get(Result);
3381  }
3382  case Builtin::BI__builtin_parity:
3383  case Builtin::BI__builtin_parityl:
3384  case Builtin::BI__builtin_parityll: {
3385  // parity(x) -> ctpop(x) & 1
3386  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3387 
3388  llvm::Type *ArgType = ArgValue->getType();
3389  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3390 
3391  llvm::Type *ResultType = ConvertType(E->getType());
3392  Value *Tmp = Builder.CreateCall(F, ArgValue);
3393  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3394  if (Result->getType() != ResultType)
3395  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3396  "cast");
3397  return RValue::get(Result);
3398  }
3399  case Builtin::BI__lzcnt16:
3400  case Builtin::BI__lzcnt:
3401  case Builtin::BI__lzcnt64: {
3402  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3403 
3404  llvm::Type *ArgType = ArgValue->getType();
3405  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3406 
3407  llvm::Type *ResultType = ConvertType(E->getType());
3408  Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3409  if (Result->getType() != ResultType)
3410  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3411  "cast");
3412  return RValue::get(Result);
3413  }
3414  case Builtin::BI__popcnt16:
3415  case Builtin::BI__popcnt:
3416  case Builtin::BI__popcnt64:
3417  case Builtin::BI__builtin_popcount:
3418  case Builtin::BI__builtin_popcountl:
3419  case Builtin::BI__builtin_popcountll:
3420  case Builtin::BI__builtin_popcountg: {
3421  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3422 
3423  llvm::Type *ArgType = ArgValue->getType();
3424  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3425 
3426  llvm::Type *ResultType = ConvertType(E->getType());
3427  Value *Result = Builder.CreateCall(F, ArgValue);
3428  if (Result->getType() != ResultType)
3429  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3430  "cast");
3431  return RValue::get(Result);
3432  }
3433  case Builtin::BI__builtin_unpredictable: {
3434  // Always return the argument of __builtin_unpredictable. LLVM does not
3435  // handle this builtin. Metadata for this builtin should be added directly
3436  // to instructions such as branches or switches that use it.
3437  return RValue::get(EmitScalarExpr(E->getArg(0)));
3438  }
3439  case Builtin::BI__builtin_expect: {
3440  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3441  llvm::Type *ArgType = ArgValue->getType();
3442 
3443  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3444  // Don't generate llvm.expect on -O0 as the backend won't use it for
3445  // anything.
3446  // Note, we still IRGen ExpectedValue because it could have side-effects.
3447  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3448  return RValue::get(ArgValue);
3449 
3450  Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3451  Value *Result =
3452  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3453  return RValue::get(Result);
3454  }
3455  case Builtin::BI__builtin_expect_with_probability: {
3456  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3457  llvm::Type *ArgType = ArgValue->getType();
3458 
3459  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3460  llvm::APFloat Probability(0.0);
3461  const Expr *ProbArg = E->getArg(2);
3462  bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3463  assert(EvalSucceed && "probability should be able to evaluate as float");
3464  (void)EvalSucceed;
3465  bool LoseInfo = false;
3466  Probability.convert(llvm::APFloat::IEEEdouble(),
3467  llvm::RoundingMode::Dynamic, &LoseInfo);
3468  llvm::Type *Ty = ConvertType(ProbArg->getType());
3469  Constant *Confidence = ConstantFP::get(Ty, Probability);
3470  // Don't generate llvm.expect.with.probability on -O0 as the backend
3471  // won't use it for anything.
3472  // Note, we still IRGen ExpectedValue because it could have side-effects.
3473  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3474  return RValue::get(ArgValue);
3475 
3476  Function *FnExpect =
3477  CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3478  Value *Result = Builder.CreateCall(
3479  FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3480  return RValue::get(Result);
3481  }
3482  case Builtin::BI__builtin_assume_aligned: {
3483  const Expr *Ptr = E->getArg(0);
3484  Value *PtrValue = EmitScalarExpr(Ptr);
3485  Value *OffsetValue =
3486  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3487 
3488  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3489  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3490  if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3491  AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3492  llvm::Value::MaximumAlignment);
3493 
3494  emitAlignmentAssumption(PtrValue, Ptr,
3495  /*The expr loc is sufficient.*/ SourceLocation(),
3496  AlignmentCI, OffsetValue);
3497  return RValue::get(PtrValue);
3498  }
3499  case Builtin::BI__assume:
3500  case Builtin::BI__builtin_assume: {
3501  if (E->getArg(0)->HasSideEffects(getContext()))
3502  return RValue::get(nullptr);
3503 
3504  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3505  Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3506  Builder.CreateCall(FnAssume, ArgValue);
3507  return RValue::get(nullptr);
3508  }
3509  case Builtin::BI__builtin_assume_separate_storage: {
3510  const Expr *Arg0 = E->getArg(0);
3511  const Expr *Arg1 = E->getArg(1);
3512 
3513  Value *Value0 = EmitScalarExpr(Arg0);
3514  Value *Value1 = EmitScalarExpr(Arg1);
3515 
3516  Value *Values[] = {Value0, Value1};
3517  OperandBundleDefT<Value *> OBD("separate_storage", Values);
3518  Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3519  return RValue::get(nullptr);
3520  }
3521  case Builtin::BI__builtin_allow_runtime_check: {
3522  StringRef Kind =
3523  cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3524  LLVMContext &Ctx = CGM.getLLVMContext();
3525  llvm::Value *Allow = Builder.CreateCall(
3526  CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3527  llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3528  return RValue::get(Allow);
3529  }
3530  case Builtin::BI__arithmetic_fence: {
3531  // Create the builtin call if FastMath is selected, and the target
3532  // supports the builtin, otherwise just return the argument.
3533  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3534  llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3535  bool isArithmeticFenceEnabled =
3536  FMF.allowReassoc() &&
3537  getContext().getTargetInfo().checkArithmeticFenceSupported();
3538  QualType ArgType = E->getArg(0)->getType();
3539  if (ArgType->isComplexType()) {
3540  if (isArithmeticFenceEnabled) {
3541  QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3542  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3543  Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3544  ConvertType(ElementType));
3545  Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3546  ConvertType(ElementType));
3547  return RValue::getComplex(std::make_pair(Real, Imag));
3548  }
3549  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3550  Value *Real = ComplexVal.first;
3551  Value *Imag = ComplexVal.second;
3552  return RValue::getComplex(std::make_pair(Real, Imag));
3553  }
3554  Value *ArgValue = EmitScalarExpr(E->getArg(0));
3555  if (isArithmeticFenceEnabled)
3556  return RValue::get(
3557  Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3558  return RValue::get(ArgValue);
3559  }
3560  case Builtin::BI__builtin_bswap16:
3561  case Builtin::BI__builtin_bswap32:
3562  case Builtin::BI__builtin_bswap64:
3563  case Builtin::BI_byteswap_ushort:
3564  case Builtin::BI_byteswap_ulong:
3565  case Builtin::BI_byteswap_uint64: {
3566  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
3567  }
3568  case Builtin::BI__builtin_bitreverse8:
3569  case Builtin::BI__builtin_bitreverse16:
3570  case Builtin::BI__builtin_bitreverse32:
3571  case Builtin::BI__builtin_bitreverse64: {
3572  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
3573  }
3574  case Builtin::BI__builtin_rotateleft8:
3575  case Builtin::BI__builtin_rotateleft16:
3576  case Builtin::BI__builtin_rotateleft32:
3577  case Builtin::BI__builtin_rotateleft64:
3578  case Builtin::BI_rotl8: // Microsoft variants of rotate left
3579  case Builtin::BI_rotl16:
3580  case Builtin::BI_rotl:
3581  case Builtin::BI_lrotl:
3582  case Builtin::BI_rotl64:
3583  return emitRotate(E, false);
3584 
3585  case Builtin::BI__builtin_rotateright8:
3586  case Builtin::BI__builtin_rotateright16:
3587  case Builtin::BI__builtin_rotateright32:
3588  case Builtin::BI__builtin_rotateright64:
3589  case Builtin::BI_rotr8: // Microsoft variants of rotate right
3590  case Builtin::BI_rotr16:
3591  case Builtin::BI_rotr:
3592  case Builtin::BI_lrotr:
3593  case Builtin::BI_rotr64:
3594  return emitRotate(E, true);
3595 
3596  case Builtin::BI__builtin_constant_p: {
3597  llvm::Type *ResultType = ConvertType(E->getType());
3598 
3599  const Expr *Arg = E->getArg(0);
3600  QualType ArgType = Arg->getType();
3601  // FIXME: The allowance for Obj-C pointers and block pointers is historical
3602  // and likely a mistake.
3603  if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3604  !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3605  // Per the GCC documentation, only numeric constants are recognized after
3606  // inlining.
3607  return RValue::get(ConstantInt::get(ResultType, 0));
3608 
3609  if (Arg->HasSideEffects(getContext()))
3610  // The argument is unevaluated, so be conservative if it might have
3611  // side-effects.
3612  return RValue::get(ConstantInt::get(ResultType, 0));
3613 
3614  Value *ArgValue = EmitScalarExpr(Arg);
3615  if (ArgType->isObjCObjectPointerType()) {
3616  // Convert Objective-C objects to id because we cannot distinguish between
3617  // LLVM types for Obj-C classes as they are opaque.
3618  ArgType = CGM.getContext().getObjCIdType();
3619  ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3620  }
3621  Function *F =
3622  CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3623  Value *Result = Builder.CreateCall(F, ArgValue);
3624  if (Result->getType() != ResultType)
3625  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3626  return RValue::get(Result);
3627  }
3628  case Builtin::BI__builtin_dynamic_object_size:
3629  case Builtin::BI__builtin_object_size: {
3630  unsigned Type =
3631  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3632  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3633 
3634  // We pass this builtin onto the optimizer so that it can figure out the
3635  // object size in more complex cases.
3636  bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3637  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3638  /*EmittedE=*/nullptr, IsDynamic));
3639  }
3640  case Builtin::BI__builtin_prefetch: {
3641  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3642  // FIXME: Technically these constants should of type 'int', yes?
3643  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3644  llvm::ConstantInt::get(Int32Ty, 0);
3645  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3646  llvm::ConstantInt::get(Int32Ty, 3);
3647  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3648  Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3649  Builder.CreateCall(F, {Address, RW, Locality, Data});
3650  return RValue::get(nullptr);
3651  }
3652  case Builtin::BI__builtin_readcyclecounter: {
3653  Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3654  return RValue::get(Builder.CreateCall(F));
3655  }
3656  case Builtin::BI__builtin_readsteadycounter: {
3657  Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3658  return RValue::get(Builder.CreateCall(F));
3659  }
3660  case Builtin::BI__builtin___clear_cache: {
3661  Value *Begin = EmitScalarExpr(E->getArg(0));
3662  Value *End = EmitScalarExpr(E->getArg(1));
3663  Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3664  return RValue::get(Builder.CreateCall(F, {Begin, End}));
3665  }
3666  case Builtin::BI__builtin_trap:
3667  EmitTrapCall(Intrinsic::trap);
3668  return RValue::get(nullptr);
3669  case Builtin::BI__debugbreak:
3670  EmitTrapCall(Intrinsic::debugtrap);
3671  return RValue::get(nullptr);
3672  case Builtin::BI__builtin_unreachable: {
3673  EmitUnreachable(E->getExprLoc());
3674 
3675  // We do need to preserve an insertion point.
3676  EmitBlock(createBasicBlock("unreachable.cont"));
3677 
3678  return RValue::get(nullptr);
3679  }
3680 
3681  case Builtin::BI__builtin_powi:
3682  case Builtin::BI__builtin_powif:
3683  case Builtin::BI__builtin_powil: {
3684  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3685  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3686 
3687  if (Builder.getIsFPConstrained()) {
3688  // FIXME: llvm.powi has 2 mangling types,
3689  // llvm.experimental.constrained.powi has one.
3690  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3691  Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3692  Src0->getType());
3693  return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3694  }
3695 
3696  Function *F = CGM.getIntrinsic(Intrinsic::powi,
3697  { Src0->getType(), Src1->getType() });
3698  return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3699  }
3700  case Builtin::BI__builtin_frexpl: {
3701  // Linux PPC will not be adding additional PPCDoubleDouble support.
3702  // WIP to switch default to IEEE long double. Will emit libcall for
3703  // frexpl instead of legalizing this type in the BE.
3704  if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3705  break;
3706  LLVM_FALLTHROUGH;
3707  }
3708  case Builtin::BI__builtin_frexp:
3709  case Builtin::BI__builtin_frexpf:
3710  case Builtin::BI__builtin_frexpf128:
3711  case Builtin::BI__builtin_frexpf16:
3712  return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3713  case Builtin::BI__builtin_isgreater:
3714  case Builtin::BI__builtin_isgreaterequal:
3715  case Builtin::BI__builtin_isless:
3716  case Builtin::BI__builtin_islessequal:
3717  case Builtin::BI__builtin_islessgreater:
3718  case Builtin::BI__builtin_isunordered: {
3719  // Ordered comparisons: we know the arguments to these are matching scalar
3720  // floating point values.
3721  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3722  Value *LHS = EmitScalarExpr(E->getArg(0));
3723  Value *RHS = EmitScalarExpr(E->getArg(1));
3724 
3725  switch (BuiltinID) {
3726  default: llvm_unreachable("Unknown ordered comparison");
3727  case Builtin::BI__builtin_isgreater:
3728  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3729  break;
3730  case Builtin::BI__builtin_isgreaterequal:
3731  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3732  break;
3733  case Builtin::BI__builtin_isless:
3734  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3735  break;
3736  case Builtin::BI__builtin_islessequal:
3737  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3738  break;
3739  case Builtin::BI__builtin_islessgreater:
3740  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3741  break;
3742  case Builtin::BI__builtin_isunordered:
3743  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3744  break;
3745  }
3746  // ZExt bool to int type.
3747  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3748  }
3749 
3750  case Builtin::BI__builtin_isnan: {
3751  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3752  Value *V = EmitScalarExpr(E->getArg(0));
3753  if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3754  return RValue::get(Result);
3755  return RValue::get(
3756  Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3757  ConvertType(E->getType())));
3758  }
3759 
3760  case Builtin::BI__builtin_issignaling: {
3761  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3762  Value *V = EmitScalarExpr(E->getArg(0));
3763  return RValue::get(
3764  Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3765  ConvertType(E->getType())));
3766  }
3767 
3768  case Builtin::BI__builtin_isinf: {
3769  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3770  Value *V = EmitScalarExpr(E->getArg(0));
3771  if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3772  return RValue::get(Result);
3773  return RValue::get(
3774  Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3775  ConvertType(E->getType())));
3776  }
3777 
3778  case Builtin::BIfinite:
3779  case Builtin::BI__finite:
3780  case Builtin::BIfinitef:
3781  case Builtin::BI__finitef:
3782  case Builtin::BIfinitel:
3783  case Builtin::BI__finitel:
3784  case Builtin::BI__builtin_isfinite: {
3785  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3786  Value *V = EmitScalarExpr(E->getArg(0));
3787  if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3788  return RValue::get(Result);
3789  return RValue::get(
3790  Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3791  ConvertType(E->getType())));
3792  }
3793 
3794  case Builtin::BI__builtin_isnormal: {
3795  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3796  Value *V = EmitScalarExpr(E->getArg(0));
3797  return RValue::get(
3798  Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3799  ConvertType(E->getType())));
3800  }
3801 
3802  case Builtin::BI__builtin_issubnormal: {
3803  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3804  Value *V = EmitScalarExpr(E->getArg(0));
3805  return RValue::get(
3806  Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3807  ConvertType(E->getType())));
3808  }
3809 
3810  case Builtin::BI__builtin_iszero: {
3811  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3812  Value *V = EmitScalarExpr(E->getArg(0));
3813  return RValue::get(
3814  Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3815  ConvertType(E->getType())));
3816  }
3817 
3818  case Builtin::BI__builtin_isfpclass: {
3819  Expr::EvalResult Result;
3820  if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3821  break;
3822  uint64_t Test = Result.Val.getInt().getLimitedValue();
3823  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3824  Value *V = EmitScalarExpr(E->getArg(0));
3825  return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3826  ConvertType(E->getType())));
3827  }
3828 
3829  case Builtin::BI__builtin_nondeterministic_value: {
3830  llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3831 
3832  Value *Result = PoisonValue::get(Ty);
3833  Result = Builder.CreateFreeze(Result);
3834 
3835  return RValue::get(Result);
3836  }
3837 
3838  case Builtin::BI__builtin_elementwise_abs: {
3839  Value *Result;
3840  QualType QT = E->getArg(0)->getType();
3841 
3842  if (auto *VecTy = QT->getAs<VectorType>())
3843  QT = VecTy->getElementType();
3844  if (QT->isIntegerType())
3845  Result = Builder.CreateBinaryIntrinsic(
3846  llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3847  Builder.getFalse(), nullptr, "elt.abs");
3848  else
3849  Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3850 
3851  return RValue::get(Result);
3852  }
3853 
3854  case Builtin::BI__builtin_elementwise_ceil:
3855  return RValue::get(
3856  emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3857  case Builtin::BI__builtin_elementwise_exp:
3858  return RValue::get(
3859  emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp"));
3860  case Builtin::BI__builtin_elementwise_exp2:
3861  return RValue::get(
3862  emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3863  case Builtin::BI__builtin_elementwise_log:
3864  return RValue::get(
3865  emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log"));
3866  case Builtin::BI__builtin_elementwise_log2:
3867  return RValue::get(
3868  emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2"));
3869  case Builtin::BI__builtin_elementwise_log10:
3870  return RValue::get(
3871  emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10"));
3872  case Builtin::BI__builtin_elementwise_pow: {
3874  }
3875  case Builtin::BI__builtin_elementwise_bitreverse:
3876  return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse,
3877  "elt.bitreverse"));
3878  case Builtin::BI__builtin_elementwise_cos:
3879  return RValue::get(
3880  emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
3881  case Builtin::BI__builtin_elementwise_floor:
3882  return RValue::get(
3883  emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3884  case Builtin::BI__builtin_elementwise_roundeven:
3885  return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3886  "elt.roundeven"));
3887  case Builtin::BI__builtin_elementwise_round:
3889  "elt.round"));
3890  case Builtin::BI__builtin_elementwise_rint:
3892  "elt.rint"));
3893  case Builtin::BI__builtin_elementwise_nearbyint:
3895  "elt.nearbyint"));
3896  case Builtin::BI__builtin_elementwise_sin:
3897  return RValue::get(
3898  emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
3899 
3900  case Builtin::BI__builtin_elementwise_trunc:
3901  return RValue::get(
3902  emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3903  case Builtin::BI__builtin_elementwise_canonicalize:
3904  return RValue::get(
3905  emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3906  case Builtin::BI__builtin_elementwise_copysign:
3908  case Builtin::BI__builtin_elementwise_fma:
3910  case Builtin::BI__builtin_elementwise_add_sat:
3911  case Builtin::BI__builtin_elementwise_sub_sat: {
3912  Value *Op0 = EmitScalarExpr(E->getArg(0));
3913  Value *Op1 = EmitScalarExpr(E->getArg(1));
3914  Value *Result;
3915  assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3916  QualType Ty = E->getArg(0)->getType();
3917  if (auto *VecTy = Ty->getAs<VectorType>())
3918  Ty = VecTy->getElementType();
3919  bool IsSigned = Ty->isSignedIntegerType();
3920  unsigned Opc;
3921  if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3922  Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3923  else
3924  Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3925  Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3926  return RValue::get(Result);
3927  }
3928 
3929  case Builtin::BI__builtin_elementwise_max: {
3930  Value *Op0 = EmitScalarExpr(E->getArg(0));
3931  Value *Op1 = EmitScalarExpr(E->getArg(1));
3932  Value *Result;
3933  if (Op0->getType()->isIntOrIntVectorTy()) {
3934  QualType Ty = E->getArg(0)->getType();
3935  if (auto *VecTy = Ty->getAs<VectorType>())
3936  Ty = VecTy->getElementType();
3937  Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3938  ? llvm::Intrinsic::smax
3940  Op0, Op1, nullptr, "elt.max");
3941  } else
3942  Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3943  return RValue::get(Result);
3944  }
3945  case Builtin::BI__builtin_elementwise_min: {
3946  Value *Op0 = EmitScalarExpr(E->getArg(0));
3947  Value *Op1 = EmitScalarExpr(E->getArg(1));
3948  Value *Result;
3949  if (Op0->getType()->isIntOrIntVectorTy()) {
3950  QualType Ty = E->getArg(0)->getType();
3951  if (auto *VecTy = Ty->getAs<VectorType>())
3952  Ty = VecTy->getElementType();
3953  Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3954  ? llvm::Intrinsic::smin
3956  Op0, Op1, nullptr, "elt.min");
3957  } else
3958  Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3959  return RValue::get(Result);
3960  }
3961 
3962  case Builtin::BI__builtin_reduce_max: {
3963  auto GetIntrinsicID = [](QualType QT) {
3964  if (auto *VecTy = QT->getAs<VectorType>())
3965  QT = VecTy->getElementType();
3966  if (QT->isSignedIntegerType())
3967  return llvm::Intrinsic::vector_reduce_smax;
3968  if (QT->isUnsignedIntegerType())
3969  return llvm::Intrinsic::vector_reduce_umax;
3970  assert(QT->isFloatingType() && "must have a float here");
3971  return llvm::Intrinsic::vector_reduce_fmax;
3972  };
3974  *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3975  }
3976 
3977  case Builtin::BI__builtin_reduce_min: {
3978  auto GetIntrinsicID = [](QualType QT) {
3979  if (auto *VecTy = QT->getAs<VectorType>())
3980  QT = VecTy->getElementType();
3981  if (QT->isSignedIntegerType())
3982  return llvm::Intrinsic::vector_reduce_smin;
3983  if (QT->isUnsignedIntegerType())
3984  return llvm::Intrinsic::vector_reduce_umin;
3985  assert(QT->isFloatingType() && "must have a float here");
3986  return llvm::Intrinsic::vector_reduce_fmin;
3987  };
3988 
3990  *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3991  }
3992 
3993  case Builtin::BI__builtin_reduce_add:
3995  *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3996  case Builtin::BI__builtin_reduce_mul:
3998  *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3999  case Builtin::BI__builtin_reduce_xor:
4001  *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4002  case Builtin::BI__builtin_reduce_or:
4004  *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4005  case Builtin::BI__builtin_reduce_and:
4007  *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4008 
4009  case Builtin::BI__builtin_matrix_transpose: {
4010  auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4011  Value *MatValue = EmitScalarExpr(E->getArg(0));
4012  MatrixBuilder MB(Builder);
4013  Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4014  MatrixTy->getNumColumns());
4015  return RValue::get(Result);
4016  }
4017 
4018  case Builtin::BI__builtin_matrix_column_major_load: {
4019  MatrixBuilder MB(Builder);
4020  // Emit everything that isn't dependent on the first parameter type
4021  Value *Stride = EmitScalarExpr(E->getArg(3));
4022  const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4023  auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4024  assert(PtrTy && "arg0 must be of pointer type");
4025  bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4026 
4028  EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)),
4029  E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4030  0);
4031  Value *Result = MB.CreateColumnMajorLoad(
4032  Src.getElementType(), Src.emitRawPointer(*this),
4033  Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4034  ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4035  return RValue::get(Result);
4036  }
4037 
4038  case Builtin::BI__builtin_matrix_column_major_store: {
4039  MatrixBuilder MB(Builder);
4040  Value *Matrix = EmitScalarExpr(E->getArg(0));
4042  Value *Stride = EmitScalarExpr(E->getArg(2));
4043 
4044  const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4045  auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4046  assert(PtrTy && "arg1 must be of pointer type");
4047  bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4048 
4049  EmitNonNullArgCheck(RValue::get(Dst.emitRawPointer(*this)),
4050  E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4051  0);
4052  Value *Result = MB.CreateColumnMajorStore(
4053  Matrix, Dst.emitRawPointer(*this),
4054  Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4055  MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4056  return RValue::get(Result);
4057  }
4058 
4059  case Builtin::BI__builtin_isinf_sign: {
4060  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4061  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4062  // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4063  Value *Arg = EmitScalarExpr(E->getArg(0));
4064  Value *AbsArg = EmitFAbs(*this, Arg);
4065  Value *IsInf = Builder.CreateFCmpOEQ(
4066  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4067  Value *IsNeg = EmitSignBit(*this, Arg);
4068 
4069  llvm::Type *IntTy = ConvertType(E->getType());
4070  Value *Zero = Constant::getNullValue(IntTy);
4071  Value *One = ConstantInt::get(IntTy, 1);
4072  Value *NegativeOne = ConstantInt::get(IntTy, -1);
4073  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4074  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4075  return RValue::get(Result);
4076  }
4077 
4078  case Builtin::BI__builtin_flt_rounds: {
4079  Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4080 
4081  llvm::Type *ResultType = ConvertType(E->getType());
4082  Value *Result = Builder.CreateCall(F);
4083  if (Result->getType() != ResultType)
4084  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4085  "cast");
4086  return RValue::get(Result);
4087  }
4088 
4089  case Builtin::BI__builtin_set_flt_rounds: {
4090  Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4091 
4092  Value *V = EmitScalarExpr(E->getArg(0));
4093  Builder.CreateCall(F, V);
4094  return RValue::get(nullptr);
4095  }
4096 
4097  case Builtin::BI__builtin_fpclassify: {
4098  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4099  // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4100  Value *V = EmitScalarExpr(E->getArg(5));
4101  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4102 
4103  // Create Result
4104  BasicBlock *Begin = Builder.GetInsertBlock();
4105  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4106  Builder.SetInsertPoint(End);
4107  PHINode *Result =
4108  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4109  "fpclassify_result");
4110 
4111  // if (V==0) return FP_ZERO
4112  Builder.SetInsertPoint(Begin);
4113  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4114  "iszero");
4115  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4116  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4117  Builder.CreateCondBr(IsZero, End, NotZero);
4118  Result->addIncoming(ZeroLiteral, Begin);
4119 
4120  // if (V != V) return FP_NAN
4121  Builder.SetInsertPoint(NotZero);
4122  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4123  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4124  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4125  Builder.CreateCondBr(IsNan, End, NotNan);
4126  Result->addIncoming(NanLiteral, NotZero);
4127 
4128  // if (fabs(V) == infinity) return FP_INFINITY
4129  Builder.SetInsertPoint(NotNan);
4130  Value *VAbs = EmitFAbs(*this, V);
4131  Value *IsInf =
4132  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4133  "isinf");
4134  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4135  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4136  Builder.CreateCondBr(IsInf, End, NotInf);
4137  Result->addIncoming(InfLiteral, NotNan);
4138 
4139  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4140  Builder.SetInsertPoint(NotInf);
4141  APFloat Smallest = APFloat::getSmallestNormalized(
4142  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4143  Value *IsNormal =
4144  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4145  "isnormal");
4146  Value *NormalResult =
4147  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4148  EmitScalarExpr(E->getArg(3)));
4149  Builder.CreateBr(End);
4150  Result->addIncoming(NormalResult, NotInf);
4151 
4152  // return Result
4153  Builder.SetInsertPoint(End);
4154  return RValue::get(Result);
4155  }
4156 
4157  // An alloca will always return a pointer to the alloca (stack) address
4158  // space. This address space need not be the same as the AST / Language
4159  // default (e.g. in C / C++ auto vars are in the generic address space). At
4160  // the AST level this is handled within CreateTempAlloca et al., but for the
4161  // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4162  // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4163  case Builtin::BIalloca:
4164  case Builtin::BI_alloca:
4165  case Builtin::BI__builtin_alloca_uninitialized:
4166  case Builtin::BI__builtin_alloca: {
4167  Value *Size = EmitScalarExpr(E->getArg(0));
4168  const TargetInfo &TI = getContext().getTargetInfo();
4169  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4170  const Align SuitableAlignmentInBytes =
4171  CGM.getContext()
4172  .toCharUnitsFromBits(TI.getSuitableAlign())
4173  .getAsAlign();
4174  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4175  AI->setAlignment(SuitableAlignmentInBytes);
4176  if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4177  initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4178  LangAS AAS = getASTAllocaAddressSpace();
4179  LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
4180  if (AAS != EAS) {
4181  llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4182  return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4183  EAS, Ty));
4184  }
4185  return RValue::get(AI);
4186  }
4187 
4188  case Builtin::BI__builtin_alloca_with_align_uninitialized:
4189  case Builtin::BI__builtin_alloca_with_align: {
4190  Value *Size = EmitScalarExpr(E->getArg(0));
4191  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4192  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4193  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4194  const Align AlignmentInBytes =
4195  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4196  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4197  AI->setAlignment(AlignmentInBytes);
4198  if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4199  initializeAlloca(*this, AI, Size, AlignmentInBytes);
4200  LangAS AAS = getASTAllocaAddressSpace();
4201  LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
4202  if (AAS != EAS) {
4203  llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4204  return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4205  EAS, Ty));
4206  }
4207  return RValue::get(AI);
4208  }
4209 
4210  case Builtin::BIbzero:
4211  case Builtin::BI__builtin_bzero: {
4212  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4213  Value *SizeVal = EmitScalarExpr(E->getArg(1));
4214  EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4215  E->getArg(0)->getExprLoc(), FD, 0);
4216  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4217  return RValue::get(nullptr);
4218  }
4219 
4220  case Builtin::BIbcopy:
4221  case Builtin::BI__builtin_bcopy: {
4223  Address Dest = EmitPointerWithAlignment(E->getArg(1));
4224  Value *SizeVal = EmitScalarExpr(E->getArg(2));
4225  EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)),
4226  E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4227  0);
4228  EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),
4229  E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4230  0);
4231  Builder.CreateMemMove(Dest, Src, SizeVal, false);
4232  return RValue::get(nullptr);
4233  }
4234 
4235  case Builtin::BImemcpy:
4236  case Builtin::BI__builtin_memcpy:
4237  case Builtin::BImempcpy:
4238  case Builtin::BI__builtin_mempcpy: {
4239  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4241  Value *SizeVal = EmitScalarExpr(E->getArg(2));
4242  EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4243  EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4244  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4245  if (BuiltinID == Builtin::BImempcpy ||
4246  BuiltinID == Builtin::BI__builtin_mempcpy)
4247  return RValue::get(Builder.CreateInBoundsGEP(
4248  Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4249  else
4250  return RValue::get(Dest, *this);
4251  }
4252 
4253  case Builtin::BI__builtin_memcpy_inline: {
4254  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4256  uint64_t Size =
4257  E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4258  EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4259  EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4260  Builder.CreateMemCpyInline(Dest, Src, Size);
4261  return RValue::get(nullptr);
4262  }
4263 
4264  case Builtin::BI__builtin_char_memchr:
4265  BuiltinID = Builtin::BI__builtin_memchr;
4266  break;
4267 
4268  case Builtin::BI__builtin___memcpy_chk: {
4269  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4270  Expr::EvalResult SizeResult, DstSizeResult;
4271  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4272  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4273  break;
4274  llvm::APSInt Size = SizeResult.Val.getInt();
4275  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4276  if (Size.ugt(DstSize))
4277  break;
4278  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4280  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4281  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4282  return RValue::get(Dest, *this);
4283  }
4284 
4285  case Builtin::BI__builtin_objc_memmove_collectable: {
4286  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4287  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4288  Value *SizeVal = EmitScalarExpr(E->getArg(2));
4289  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
4290  DestAddr, SrcAddr, SizeVal);
4291  return RValue::get(DestAddr, *this);
4292  }
4293 
4294  case Builtin::BI__builtin___memmove_chk: {
4295  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4296  Expr::EvalResult SizeResult, DstSizeResult;
4297  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4298  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4299  break;
4300  llvm::APSInt Size = SizeResult.Val.getInt();
4301  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4302  if (Size.ugt(DstSize))
4303  break;
4304  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4306  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4307  Builder.CreateMemMove(Dest, Src, SizeVal, false);
4308  return RValue::get(Dest, *this);
4309  }
4310 
4311  case Builtin::BImemmove:
4312  case Builtin::BI__builtin_memmove: {
4313  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4315  Value *SizeVal = EmitScalarExpr(E->getArg(2));
4316  EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4317  EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4318  Builder.CreateMemMove(Dest, Src, SizeVal, false);
4319  return RValue::get(Dest, *this);
4320  }
4321  case Builtin::BImemset:
4322  case Builtin::BI__builtin_memset: {
4323  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4324  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4325  Builder.getInt8Ty());
4326  Value *SizeVal = EmitScalarExpr(E->getArg(2));
4327  EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4328  E->getArg(0)->getExprLoc(), FD, 0);
4329  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4330  return RValue::get(Dest, *this);
4331  }
4332  case Builtin::BI__builtin_memset_inline: {
4333  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4334  Value *ByteVal =
4335  Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4336  uint64_t Size =
4337  E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4338  EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),
4339  E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4340  0);
4341  Builder.CreateMemSetInline(Dest, ByteVal, Size);
4342  return RValue::get(nullptr);
4343  }
4344  case Builtin::BI__builtin___memset_chk: {
4345  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4346  Expr::EvalResult SizeResult, DstSizeResult;
4347  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4348  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4349  break;
4350  llvm::APSInt Size = SizeResult.Val.getInt();
4351  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4352  if (Size.ugt(DstSize))
4353  break;
4354  Address Dest = EmitPointerWithAlignment(E->getArg(0));
4355  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4356  Builder.getInt8Ty());
4357  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4358  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4359  return RValue::get(Dest, *this);
4360  }
4361  case Builtin::BI__builtin_wmemchr: {
4362  // The MSVC runtime library does not provide a definition of wmemchr, so we
4363  // need an inline implementation.
4364  if (!getTarget().getTriple().isOSMSVCRT())
4365  break;
4366 
4367  llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4368  Value *Str = EmitScalarExpr(E->getArg(0));
4369  Value *Chr = EmitScalarExpr(E->getArg(1));
4370  Value *Size = EmitScalarExpr(E->getArg(2));
4371 
4372  BasicBlock *Entry = Builder.GetInsertBlock();
4373  BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4374  BasicBlock *Next = createBasicBlock("wmemchr.next");
4375  BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4376  Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4377  Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4378 
4379  EmitBlock(CmpEq);
4380  PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4381  StrPhi->addIncoming(Str, Entry);
4382  PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4383  SizePhi->addIncoming(Size, Entry);
4384  CharUnits WCharAlign =
4385  getContext().getTypeAlignInChars(getContext().WCharTy);
4386  Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4387  Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4388  Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4389  Builder.CreateCondBr(StrEqChr, Exit, Next);
4390 
4391  EmitBlock(Next);
4392  Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4393  Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4394  Value *NextSizeEq0 =
4395  Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4396  Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4397  StrPhi->addIncoming(NextStr, Next);
4398  SizePhi->addIncoming(NextSize, Next);
4399 
4400  EmitBlock(Exit);
4401  PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4402  Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4403  Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4404  Ret->addIncoming(FoundChr, CmpEq);
4405  return RValue::get(Ret);
4406  }
4407  case Builtin::BI__builtin_wmemcmp: {
4408  // The MSVC runtime library does not provide a definition of wmemcmp, so we
4409  // need an inline implementation.
4410  if (!getTarget().getTriple().isOSMSVCRT())
4411  break;
4412 
4413  llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4414 
4415  Value *Dst = EmitScalarExpr(E->getArg(0));
4416  Value *Src = EmitScalarExpr(E->getArg(1));
4417  Value *Size = EmitScalarExpr(E->getArg(2));
4418 
4419  BasicBlock *Entry = Builder.GetInsertBlock();
4420  BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4421  BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4422  BasicBlock *Next = createBasicBlock("wmemcmp.next");
4423  BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4424  Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4425  Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4426 
4427  EmitBlock(CmpGT);
4428  PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4429  DstPhi->addIncoming(Dst, Entry);
4430  PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4431  SrcPhi->addIncoming(Src, Entry);
4432  PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4433  SizePhi->addIncoming(Size, Entry);
4434  CharUnits WCharAlign =
4435  getContext().getTypeAlignInChars(getContext().WCharTy);
4436  Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4437  Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4438  Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4439  Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4440 
4441  EmitBlock(CmpLT);
4442  Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4443  Builder.CreateCondBr(DstLtSrc, Exit, Next);
4444 
4445  EmitBlock(Next);
4446  Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4447  Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4448  Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4449  Value *NextSizeEq0 =
4450  Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4451  Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4452  DstPhi->addIncoming(NextDst, Next);
4453  SrcPhi->addIncoming(NextSrc, Next);
4454  SizePhi->addIncoming(NextSize, Next);
4455 
4456  EmitBlock(Exit);
4457  PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4458  Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4459  Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4460  Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4461  Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4462  return RValue::get(Ret);
4463  }
4464  case Builtin::BI__builtin_dwarf_cfa: {
4465  // The offset in bytes from the first argument to the CFA.
4466  //
4467  // Why on earth is this in the frontend? Is there any reason at
4468  // all that the backend can't reasonably determine this while
4469  // lowering llvm.eh.dwarf.cfa()?
4470  //
4471  // TODO: If there's a satisfactory reason, add a target hook for
4472  // this instead of hard-coding 0, which is correct for most targets.
4473  int32_t Offset = 0;
4474 
4475  Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4476  return RValue::get(Builder.CreateCall(F,
4477  llvm::ConstantInt::get(Int32Ty, Offset)));
4478  }
4479  case Builtin::BI__builtin_return_address: {
4480  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4481  getContext().UnsignedIntTy);
4482  Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4483  return RValue::get(Builder.CreateCall(F, Depth));
4484  }
4485  case Builtin::BI_ReturnAddress: {
4486  Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4487  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4488  }
4489  case Builtin::BI__builtin_frame_address: {
4490  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4491  getContext().UnsignedIntTy);
4492  Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4493  return RValue::get(Builder.CreateCall(F, Depth));
4494  }
4495  case Builtin::BI__builtin_extract_return_addr: {
4496  Value *Address = EmitScalarExpr(E->getArg(0));
4497  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
4498  return RValue::get(Result);
4499  }
4500  case Builtin::BI__builtin_frob_return_addr: {
4501  Value *Address = EmitScalarExpr(E->getArg(0));
4502  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
4503  return RValue::get(Result);
4504  }
4505  case Builtin::BI__builtin_dwarf_sp_column: {
4506  llvm::IntegerType *Ty
4507  = cast<llvm::IntegerType>(ConvertType(E->getType()));
4508  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
4509  if (Column == -1) {
4510  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4511  return RValue::get(llvm::UndefValue::get(Ty));
4512  }
4513  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4514  }
4515  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4516  Value *Address = EmitScalarExpr(E->getArg(0));
4517  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4518  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4519  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4520  }
4521  case Builtin::BI__builtin_eh_return: {
4522  Value *Int = EmitScalarExpr(E->getArg(0));
4523  Value *Ptr = EmitScalarExpr(E->getArg(1));
4524 
4525  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4526  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4527  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4528  Function *F =
4529  CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4530  : Intrinsic::eh_return_i64);
4531  Builder.CreateCall(F, {Int, Ptr});
4532  Builder.CreateUnreachable();
4533 
4534  // We do need to preserve an insertion point.
4535  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4536 
4537  return RValue::get(nullptr);
4538  }
4539  case Builtin::BI__builtin_unwind_init: {
4540  Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4541  Builder.CreateCall(F);
4542  return RValue::get(nullptr);
4543  }
4544  case Builtin::BI__builtin_extend_pointer: {
4545  // Extends a pointer to the size of an _Unwind_Word, which is
4546  // uint64_t on all platforms. Generally this gets poked into a
4547  // register and eventually used as an address, so if the
4548  // addressing registers are wider than pointers and the platform
4549  // doesn't implicitly ignore high-order bits when doing
4550  // addressing, we need to make sure we zext / sext based on
4551  // the platform's expectations.
4552  //
4553  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4554 
4555  // Cast the pointer to intptr_t.
4556  Value *Ptr = EmitScalarExpr(E->getArg(0));
4557  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4558 
4559  // If that's 64 bits, we're done.
4560  if (IntPtrTy->getBitWidth() == 64)
4561  return RValue::get(Result);
4562 
4563  // Otherwise, ask the codegen data what to do.
4564  if (getTargetHooks().extendPointerWithSExt())
4565  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4566  else
4567  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4568  }
4569  case Builtin::BI__builtin_setjmp: {
4570  // Buffer is a void**.
4572 
4573  // Store the frame pointer to the setjmp buffer.
4574  Value *FrameAddr = Builder.CreateCall(
4575  CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4576  ConstantInt::get(Int32Ty, 0));
4577  Builder.CreateStore(FrameAddr, Buf);
4578 
4579  // Store the stack pointer to the setjmp buffer.
4580  Value *StackAddr = Builder.CreateStackSave();
4581  assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4582 
4583  Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4584  Builder.CreateStore(StackAddr, StackSaveSlot);
4585 
4586  // Call LLVM's EH setjmp, which is lightweight.
4587  Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4588  return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4589  }
4590  case Builtin::BI__builtin_longjmp: {
4591  Value *Buf = EmitScalarExpr(E->getArg(0));
4592 
4593  // Call LLVM's EH longjmp, which is lightweight.
4594  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4595 
4596  // longjmp doesn't return; mark this as unreachable.
4597  Builder.CreateUnreachable();
4598 
4599  // We do need to preserve an insertion point.
4600  EmitBlock(createBasicBlock("longjmp.cont"));
4601 
4602  return RValue::get(nullptr);
4603  }
4604  case Builtin::BI__builtin_launder: {
4605  const Expr *Arg = E->getArg(0);
4606  QualType ArgTy = Arg->getType()->getPointeeType();
4607  Value *Ptr = EmitScalarExpr(Arg);
4608  if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4609  Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
4610 
4611  return RValue::get(Ptr);
4612  }
4613  case Builtin::BI__sync_fetch_and_add:
4614  case Builtin::BI__sync_fetch_and_sub:
4615  case Builtin::BI__sync_fetch_and_or:
4616  case Builtin::BI__sync_fetch_and_and:
4617  case Builtin::BI__sync_fetch_and_xor:
4618  case Builtin::BI__sync_fetch_and_nand:
4619  case Builtin::BI__sync_add_and_fetch:
4620  case Builtin::BI__sync_sub_and_fetch:
4621  case Builtin::BI__sync_and_and_fetch:
4622  case Builtin::BI__sync_or_and_fetch:
4623  case Builtin::BI__sync_xor_and_fetch:
4624  case Builtin::BI__sync_nand_and_fetch:
4625  case Builtin::BI__sync_val_compare_and_swap:
4626  case Builtin::BI__sync_bool_compare_and_swap:
4627  case Builtin::BI__sync_lock_test_and_set:
4628  case Builtin::BI__sync_lock_release:
4629  case Builtin::BI__sync_swap:
4630  llvm_unreachable("Shouldn't make it through sema");
4631  case Builtin::BI__sync_fetch_and_add_1:
4632  case Builtin::BI__sync_fetch_and_add_2:
4633  case Builtin::BI__sync_fetch_and_add_4:
4634  case Builtin::BI__sync_fetch_and_add_8:
4635  case Builtin::BI__sync_fetch_and_add_16:
4636  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4637  case Builtin::BI__sync_fetch_and_sub_1:
4638  case Builtin::BI__sync_fetch_and_sub_2:
4639  case Builtin::BI__sync_fetch_and_sub_4:
4640  case Builtin::BI__sync_fetch_and_sub_8:
4641  case Builtin::BI__sync_fetch_and_sub_16:
4642  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4643  case Builtin::BI__sync_fetch_and_or_1:
4644  case Builtin::BI__sync_fetch_and_or_2:
4645  case Builtin::BI__sync_fetch_and_or_4:
4646  case Builtin::BI__sync_fetch_and_or_8:
4647  case Builtin::BI__sync_fetch_and_or_16:
4648  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4649  case Builtin::BI__sync_fetch_and_and_1:
4650  case Builtin::BI__sync_fetch_and_and_2:
4651  case Builtin::BI__sync_fetch_and_and_4:
4652  case Builtin::BI__sync_fetch_and_and_8:
4653  case Builtin::BI__sync_fetch_and_and_16:
4654  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4655  case Builtin::BI__sync_fetch_and_xor_1:
4656  case Builtin::BI__sync_fetch_and_xor_2:
4657  case Builtin::BI__sync_fetch_and_xor_4:
4658  case Builtin::BI__sync_fetch_and_xor_8:
4659  case Builtin::BI__sync_fetch_and_xor_16:
4660  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4661  case Builtin::BI__sync_fetch_and_nand_1:
4662  case Builtin::BI__sync_fetch_and_nand_2:
4663  case Builtin::BI__sync_fetch_and_nand_4:
4664  case Builtin::BI__sync_fetch_and_nand_8:
4665  case Builtin::BI__sync_fetch_and_nand_16:
4666  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4667 
4668  // Clang extensions: not overloaded yet.
4669  case Builtin::BI__sync_fetch_and_min:
4670  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4671  case Builtin::BI__sync_fetch_and_max:
4672  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4673  case Builtin::BI__sync_fetch_and_umin:
4674  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4675  case Builtin::BI__sync_fetch_and_umax:
4676  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4677 
4678  case Builtin::BI__sync_add_and_fetch_1:
4679  case Builtin::BI__sync_add_and_fetch_2:
4680  case Builtin::BI__sync_add_and_fetch_4:
4681  case Builtin::BI__sync_add_and_fetch_8:
4682  case Builtin::BI__sync_add_and_fetch_16:
4685  case Builtin::BI__sync_sub_and_fetch_1:
4686  case Builtin::BI__sync_sub_and_fetch_2:
4687  case Builtin::BI__sync_sub_and_fetch_4:
4688  case Builtin::BI__sync_sub_and_fetch_8:
4689  case Builtin::BI__sync_sub_and_fetch_16:
4692  case Builtin::BI__sync_and_and_fetch_1:
4693  case Builtin::BI__sync_and_and_fetch_2:
4694  case Builtin::BI__sync_and_and_fetch_4:
4695  case Builtin::BI__sync_and_and_fetch_8:
4696  case Builtin::BI__sync_and_and_fetch_16:
4699  case Builtin::BI__sync_or_and_fetch_1:
4700  case Builtin::BI__sync_or_and_fetch_2:
4701  case Builtin::BI__sync_or_and_fetch_4:
4702  case Builtin::BI__sync_or_and_fetch_8:
4703  case Builtin::BI__sync_or_and_fetch_16:
4704  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4705  llvm::Instruction::Or);
4706  case Builtin::BI__sync_xor_and_fetch_1:
4707  case Builtin::BI__sync_xor_and_fetch_2:
4708  case Builtin::BI__sync_xor_and_fetch_4:
4709  case Builtin::BI__sync_xor_and_fetch_8:
4710  case Builtin::BI__sync_xor_and_fetch_16:
4711  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4712  llvm::Instruction::Xor);
4713  case Builtin::BI__sync_nand_and_fetch_1:
4714  case Builtin::BI__sync_nand_and_fetch_2:
4715  case Builtin::BI__sync_nand_and_fetch_4:
4716  case Builtin::BI__sync_nand_and_fetch_8:
4717  case Builtin::BI__sync_nand_and_fetch_16:
4718  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4719  llvm::Instruction::And, true);
4720 
4721  case Builtin::BI__sync_val_compare_and_swap_1:
4722  case Builtin::BI__sync_val_compare_and_swap_2:
4723  case Builtin::BI__sync_val_compare_and_swap_4:
4724  case Builtin::BI__sync_val_compare_and_swap_8:
4725  case Builtin::BI__sync_val_compare_and_swap_16:
4726  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4727 
4728  case Builtin::BI__sync_bool_compare_and_swap_1:
4729  case Builtin::BI__sync_bool_compare_and_swap_2:
4730  case Builtin::BI__sync_bool_compare_and_swap_4:
4731  case Builtin::BI__sync_bool_compare_and_swap_8:
4732  case Builtin::BI__sync_bool_compare_and_swap_16:
4733  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4734 
4735  case Builtin::BI__sync_swap_1:
4736  case Builtin::BI__sync_swap_2:
4737  case Builtin::BI__sync_swap_4:
4738  case Builtin::BI__sync_swap_8:
4739  case Builtin::BI__sync_swap_16:
4740  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4741 
4742  case Builtin::BI__sync_lock_test_and_set_1:
4743  case Builtin::BI__sync_lock_test_and_set_2:
4744  case Builtin::BI__sync_lock_test_and_set_4:
4745  case Builtin::BI__sync_lock_test_and_set_8:
4746  case Builtin::BI__sync_lock_test_and_set_16:
4747  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4748 
4749  case Builtin::BI__sync_lock_release_1:
4750  case Builtin::BI__sync_lock_release_2:
4751  case Builtin::BI__sync_lock_release_4:
4752  case Builtin::BI__sync_lock_release_8:
4753  case Builtin::BI__sync_lock_release_16: {
4754  Address Ptr = CheckAtomicAlignment(*this, E);
4755  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4756 
4757  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4758  getContext().getTypeSize(ElTy));
4759  llvm::StoreInst *Store =
4760  Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4761  Store->setAtomic(llvm::AtomicOrdering::Release);
4762  return RValue::get(nullptr);
4763  }
4764 
4765  case Builtin::BI__sync_synchronize: {
4766  // We assume this is supposed to correspond to a C++0x-style
4767  // sequentially-consistent fence (i.e. this is only usable for
4768  // synchronization, not device I/O or anything like that). This intrinsic
4769  // is really badly designed in the sense that in theory, there isn't
4770  // any way to safely use it... but in practice, it mostly works
4771  // to use it with non-atomic loads and stores to get acquire/release
4772  // semantics.
4773  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4774  return RValue::get(nullptr);
4775  }
4776 
4777  case Builtin::BI__builtin_nontemporal_load:
4778  return RValue::get(EmitNontemporalLoad(*this, E));
4779  case Builtin::BI__builtin_nontemporal_store:
4780  return RValue::get(EmitNontemporalStore(*this, E));
4781  case Builtin::BI__c11_atomic_is_lock_free:
4782  case Builtin::BI__atomic_is_lock_free: {
4783  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4784  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4785  // _Atomic(T) is always properly-aligned.
4786  const char *LibCallName = "__atomic_is_lock_free";
4787  CallArgList Args;
4788  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4789  getContext().getSizeType());
4790  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4791  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4792  getContext().VoidPtrTy);
4793  else
4794  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4795  getContext().VoidPtrTy);
4796  const CGFunctionInfo &FuncInfo =
4797  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
4798  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4799  llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4800  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4801  ReturnValueSlot(), Args);
4802  }
4803 
4804  case Builtin::BI__atomic_test_and_set: {
4805  // Look at the argument type to determine whether this is a volatile
4806  // operation. The parameter type is always volatile.
4807  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4808  bool Volatile =
4809  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4810 
4811  Address Ptr =
4813 
4814  Value *NewVal = Builder.getInt8(1);
4815  Value *Order = EmitScalarExpr(E->getArg(1));
4816  if (isa<llvm::ConstantInt>(Order)) {
4817  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4818  AtomicRMWInst *Result = nullptr;
4819  switch (ord) {
4820  case 0: // memory_order_relaxed
4821  default: // invalid order
4822  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4823  llvm::AtomicOrdering::Monotonic);
4824  break;
4825  case 1: // memory_order_consume
4826  case 2: // memory_order_acquire
4827  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4828  llvm::AtomicOrdering::Acquire);
4829  break;
4830  case 3: // memory_order_release
4831  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4832  llvm::AtomicOrdering::Release);
4833  break;
4834  case 4: // memory_order_acq_rel
4835 
4836  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4837  llvm::AtomicOrdering::AcquireRelease);
4838  break;
4839  case 5: // memory_order_seq_cst
4840  Result = Builder.CreateAtomicRMW(
4841  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4842  llvm::AtomicOrdering::SequentiallyConsistent);
4843  break;
4844  }
4845  Result->setVolatile(Volatile);
4846  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4847  }
4848 
4849  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4850 
4851  llvm::BasicBlock *BBs[5] = {
4852  createBasicBlock("monotonic", CurFn),
4853  createBasicBlock("acquire", CurFn),
4854  createBasicBlock("release", CurFn),
4855  createBasicBlock("acqrel", CurFn),
4856  createBasicBlock("seqcst", CurFn)
4857  };
4858  llvm::AtomicOrdering Orders[5] = {
4859  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4860  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4861  llvm::AtomicOrdering::SequentiallyConsistent};
4862 
4863  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4864  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4865 
4866  Builder.SetInsertPoint(ContBB);
4867  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4868 
4869  for (unsigned i = 0; i < 5; ++i) {
4870  Builder.SetInsertPoint(BBs[i]);
4871  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4872  Ptr, NewVal, Orders[i]);
4873  RMW->setVolatile(Volatile);
4874  Result->addIncoming(RMW, BBs[i]);
4875  Builder.CreateBr(ContBB);
4876  }
4877 
4878  SI->addCase(Builder.getInt32(0), BBs[0]);
4879  SI->addCase(Builder.getInt32(1), BBs[1]);
4880  SI->addCase(Builder.getInt32(2), BBs[1]);
4881  SI->addCase(Builder.getInt32(3), BBs[2]);
4882  SI->addCase(Builder.getInt32(4), BBs[3]);
4883  SI->addCase(Builder.getInt32(5), BBs[4]);
4884 
4885  Builder.SetInsertPoint(ContBB);
4886  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4887  }
4888 
4889  case Builtin::BI__atomic_clear: {
4890  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4891  bool Volatile =
4892  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4893 
4895  Ptr = Ptr.withElementType(Int8Ty);
4896  Value *NewVal = Builder.getInt8(0);
4897  Value *Order = EmitScalarExpr(E->getArg(1));
4898  if (isa<llvm::ConstantInt>(Order)) {
4899  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4900  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4901  switch (ord) {
4902  case 0: // memory_order_relaxed
4903  default: // invalid order
4904  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4905  break;
4906  case 3: // memory_order_release
4907  Store->setOrdering(llvm::AtomicOrdering::Release);
4908  break;
4909  case 5: // memory_order_seq_cst
4910  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4911  break;
4912  }
4913  return RValue::get(nullptr);
4914  }
4915 
4916  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4917 
4918  llvm::BasicBlock *BBs[3] = {
4919  createBasicBlock("monotonic", CurFn),
4920  createBasicBlock("release", CurFn),
4921  createBasicBlock("seqcst", CurFn)
4922  };
4923  llvm::AtomicOrdering Orders[3] = {
4924  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4925  llvm::AtomicOrdering::SequentiallyConsistent};
4926 
4927  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4928  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4929 
4930  for (unsigned i = 0; i < 3; ++i) {
4931  Builder.SetInsertPoint(BBs[i]);
4932  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4933  Store->setOrdering(Orders[i]);
4934  Builder.CreateBr(ContBB);
4935  }
4936 
4937  SI->addCase(Builder.getInt32(0), BBs[0]);
4938  SI->addCase(Builder.getInt32(3), BBs[1]);
4939  SI->addCase(Builder.getInt32(5), BBs[2]);
4940 
4941  Builder.SetInsertPoint(ContBB);
4942  return RValue::get(nullptr);
4943  }
4944 
4945  case Builtin::BI__atomic_thread_fence:
4946  case Builtin::BI__atomic_signal_fence:
4947  case Builtin::BI__c11_atomic_thread_fence:
4948  case Builtin::BI__c11_atomic_signal_fence: {
4949  llvm::SyncScope::ID SSID;
4950  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4951  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4952  SSID = llvm::SyncScope::SingleThread;
4953  else
4954  SSID = llvm::SyncScope::System;
4955  Value *Order = EmitScalarExpr(E->getArg(0));
4956  if (isa<llvm::ConstantInt>(Order)) {
4957  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4958  switch (ord) {
4959  case 0: // memory_order_relaxed
4960  default: // invalid order
4961  break;
4962  case 1: // memory_order_consume
4963  case 2: // memory_order_acquire
4964  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4965  break;
4966  case 3: // memory_order_release
4967  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4968  break;
4969  case 4: // memory_order_acq_rel
4970  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4971  break;
4972  case 5: // memory_order_seq_cst
4973  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4974  break;
4975  }
4976  return RValue::get(nullptr);
4977  }
4978 
4979  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4980  AcquireBB = createBasicBlock("acquire", CurFn);
4981  ReleaseBB = createBasicBlock("release", CurFn);
4982  AcqRelBB = createBasicBlock("acqrel", CurFn);
4983  SeqCstBB = createBasicBlock("seqcst", CurFn);
4984  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4985 
4986  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4987  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4988 
4989  Builder.SetInsertPoint(AcquireBB);
4990  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4991  Builder.CreateBr(ContBB);
4992  SI->addCase(Builder.getInt32(1), AcquireBB);
4993  SI->addCase(Builder.getInt32(2), AcquireBB);
4994 
4995  Builder.SetInsertPoint(ReleaseBB);
4996  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4997  Builder.CreateBr(ContBB);
4998  SI->addCase(Builder.getInt32(3), ReleaseBB);
4999 
5000  Builder.SetInsertPoint(AcqRelBB);
5001  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5002  Builder.CreateBr(ContBB);
5003  SI->addCase(Builder.getInt32(4), AcqRelBB);
5004 
5005  Builder.SetInsertPoint(SeqCstBB);
5006  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5007  Builder.CreateBr(ContBB);
5008  SI->addCase(Builder.getInt32(5), SeqCstBB);
5009 
5010  Builder.SetInsertPoint(ContBB);
5011  return RValue::get(nullptr);
5012  }
5013 
5014  case Builtin::BI__builtin_signbit:
5015  case Builtin::BI__builtin_signbitf:
5016  case Builtin::BI__builtin_signbitl: {
5017  return RValue::get(
5018  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5019  ConvertType(E->getType())));
5020  }
5021  case Builtin::BI__warn_memset_zero_len:
5022  return RValue::getIgnored();
5023  case Builtin::BI__annotation: {
5024  // Re-encode each wide string to UTF8 and make an MDString.
5026  for (const Expr *Arg : E->arguments()) {
5027  const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5028  assert(Str->getCharByteWidth() == 2);
5029  StringRef WideBytes = Str->getBytes();
5030  std::string StrUtf8;
5031  if (!convertUTF16ToUTF8String(
5032  ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5033  CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5034  continue;
5035  }
5036  Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5037  }
5038 
5039  // Build and MDTuple of MDStrings and emit the intrinsic call.
5040  llvm::Function *F =
5041  CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5042  MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5043  Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5044  return RValue::getIgnored();
5045  }
5046  case Builtin::BI__builtin_annotation: {
5047  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5048  llvm::Function *F =
5049  CGM.getIntrinsic(llvm::Intrinsic::annotation,
5050  {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5051 
5052  // Get the annotation string, go through casts. Sema requires this to be a
5053  // non-wide string literal, potentially casted, so the cast<> is safe.
5054  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5055  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5056  return RValue::get(
5057  EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5058  }
5059  case Builtin::BI__builtin_addcb:
5060  case Builtin::BI__builtin_addcs:
5061  case Builtin::BI__builtin_addc:
5062  case Builtin::BI__builtin_addcl:
5063  case Builtin::BI__builtin_addcll:
5064  case Builtin::BI__builtin_subcb:
5065  case Builtin::BI__builtin_subcs:
5066  case Builtin::BI__builtin_subc:
5067  case Builtin::BI__builtin_subcl:
5068  case Builtin::BI__builtin_subcll: {
5069 
5070  // We translate all of these builtins from expressions of the form:
5071  // int x = ..., y = ..., carryin = ..., carryout, result;
5072  // result = __builtin_addc(x, y, carryin, &carryout);
5073  //
5074  // to LLVM IR of the form:
5075  //
5076  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5077  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5078  // %carry1 = extractvalue {i32, i1} %tmp1, 1
5079  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5080  // i32 %carryin)
5081  // %result = extractvalue {i32, i1} %tmp2, 0
5082  // %carry2 = extractvalue {i32, i1} %tmp2, 1
5083  // %tmp3 = or i1 %carry1, %carry2
5084  // %tmp4 = zext i1 %tmp3 to i32
5085  // store i32 %tmp4, i32* %carryout
5086 
5087  // Scalarize our inputs.
5088  llvm::Value *X = EmitScalarExpr(E->getArg(0));
5089  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5090  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5091  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5092 
5093  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5094  llvm::Intrinsic::ID IntrinsicId;
5095  switch (BuiltinID) {
5096  default: llvm_unreachable("Unknown multiprecision builtin id.");
5097  case Builtin::BI__builtin_addcb:
5098  case Builtin::BI__builtin_addcs:
5099  case Builtin::BI__builtin_addc:
5100  case Builtin::BI__builtin_addcl:
5101  case Builtin::BI__builtin_addcll:
5102  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5103  break;
5104  case Builtin::BI__builtin_subcb:
5105  case Builtin::BI__builtin_subcs:
5106  case Builtin::BI__builtin_subc:
5107  case Builtin::BI__builtin_subcl:
5108  case Builtin::BI__builtin_subcll:
5109  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5110  break;
5111  }
5112 
5113  // Construct our resulting LLVM IR expression.
5114  llvm::Value *Carry1;
5115  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5116  X, Y, Carry1);
5117  llvm::Value *Carry2;
5118  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5119  Sum1, Carryin, Carry2);
5120  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5121  X->getType());
5122  Builder.CreateStore(CarryOut, CarryOutPtr);
5123  return RValue::get(Sum2);
5124  }
5125 
5126  case Builtin::BI__builtin_add_overflow:
5127  case Builtin::BI__builtin_sub_overflow:
5128  case Builtin::BI__builtin_mul_overflow: {
5129  const clang::Expr *LeftArg = E->getArg(0);
5130  const clang::Expr *RightArg = E->getArg(1);
5131  const clang::Expr *ResultArg = E->getArg(2);
5132 
5133  clang::QualType ResultQTy =
5134  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5135 
5136  WidthAndSignedness LeftInfo =
5137  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
5138  WidthAndSignedness RightInfo =
5139  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
5140  WidthAndSignedness ResultInfo =
5141  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
5142 
5143  // Handle mixed-sign multiplication as a special case, because adding
5144  // runtime or backend support for our generic irgen would be too expensive.
5145  if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5146  return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5147  RightInfo, ResultArg, ResultQTy,
5148  ResultInfo);
5149 
5150  if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5151  ResultInfo))
5153  *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5154  ResultInfo);
5155 
5156  WidthAndSignedness EncompassingInfo =
5157  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5158 
5159  llvm::Type *EncompassingLLVMTy =
5160  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5161 
5162  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5163 
5164  llvm::Intrinsic::ID IntrinsicId;
5165  switch (BuiltinID) {
5166  default:
5167  llvm_unreachable("Unknown overflow builtin id.");
5168  case Builtin::BI__builtin_add_overflow:
5169  IntrinsicId = EncompassingInfo.Signed
5170  ? llvm::Intrinsic::sadd_with_overflow
5171  : llvm::Intrinsic::uadd_with_overflow;
5172  break;
5173  case Builtin::BI__builtin_sub_overflow:
5174  IntrinsicId = EncompassingInfo.Signed
5175  ? llvm::Intrinsic::ssub_with_overflow
5176  : llvm::Intrinsic::usub_with_overflow;
5177  break;
5178  case Builtin::BI__builtin_mul_overflow:
5179  IntrinsicId = EncompassingInfo.Signed
5180  ? llvm::Intrinsic::smul_with_overflow
5181  : llvm::Intrinsic::umul_with_overflow;
5182  break;
5183  }
5184 
5185  llvm::Value *Left = EmitScalarExpr(LeftArg);
5186  llvm::Value *Right = EmitScalarExpr(RightArg);
5187  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5188 
5189  // Extend each operand to the encompassing type.
5190  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5191  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5192 
5193  // Perform the operation on the extended values.
5194  llvm::Value *Overflow, *Result;
5195  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5196 
5197  if (EncompassingInfo.Width > ResultInfo.Width) {
5198  // The encompassing type is wider than the result type, so we need to
5199  // truncate it.
5200  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5201 
5202  // To see if the truncation caused an overflow, we will extend
5203  // the result and then compare it to the original result.
5204  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5205  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5206  llvm::Value *TruncationOverflow =
5207  Builder.CreateICmpNE(Result, ResultTruncExt);
5208 
5209  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5210  Result = ResultTrunc;
5211  }
5212 
5213  // Finally, store the result using the pointer.
5214  bool isVolatile =
5215  ResultArg->getType()->getPointeeType().isVolatileQualified();
5216  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5217 
5218  return RValue::get(Overflow);
5219  }
5220 
5221  case Builtin::BI__builtin_uadd_overflow:
5222  case Builtin::BI__builtin_uaddl_overflow:
5223  case Builtin::BI__builtin_uaddll_overflow:
5224  case Builtin::BI__builtin_usub_overflow:
5225  case Builtin::BI__builtin_usubl_overflow:
5226  case Builtin::BI__builtin_usubll_overflow:
5227  case Builtin::BI__builtin_umul_overflow:
5228  case Builtin::BI__builtin_umull_overflow:
5229  case Builtin::BI__builtin_umulll_overflow:
5230  case Builtin::BI__builtin_sadd_overflow:
5231  case Builtin::BI__builtin_saddl_overflow:
5232  case Builtin::BI__builtin_saddll_overflow:
5233  case Builtin::BI__builtin_ssub_overflow:
5234  case Builtin::BI__builtin_ssubl_overflow:
5235  case Builtin::BI__builtin_ssubll_overflow:
5236  case Builtin::BI__builtin_smul_overflow:
5237  case Builtin::BI__builtin_smull_overflow:
5238  case Builtin::BI__builtin_smulll_overflow: {
5239 
5240  // We translate all of these builtins directly to the relevant llvm IR node.
5241 
5242  // Scalarize our inputs.
5243  llvm::Value *X = EmitScalarExpr(E->getArg(0));
5244  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5245  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5246 
5247  // Decide which of the overflow intrinsics we are lowering to:
5248  llvm::Intrinsic::ID IntrinsicId;
5249  switch (BuiltinID) {
5250  default: llvm_unreachable("Unknown overflow builtin id.");
5251  case Builtin::BI__builtin_uadd_overflow:
5252  case Builtin::BI__builtin_uaddl_overflow:
5253  case Builtin::BI__builtin_uaddll_overflow:
5254  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5255  break;
5256  case Builtin::BI__builtin_usub_overflow:
5257  case Builtin::BI__builtin_usubl_overflow:
5258  case Builtin::BI__builtin_usubll_overflow:
5259  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5260  break;
5261  case Builtin::BI__builtin_umul_overflow:
5262  case Builtin::BI__builtin_umull_overflow:
5263  case Builtin::BI__builtin_umulll_overflow:
5264  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5265  break;
5266  case Builtin::BI__builtin_sadd_overflow:
5267  case Builtin::BI__builtin_saddl_overflow:
5268  case Builtin::BI__builtin_saddll_overflow:
5269  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5270  break;
5271  case Builtin::BI__builtin_ssub_overflow:
5272  case Builtin::BI__builtin_ssubl_overflow:
5273  case Builtin::BI__builtin_ssubll_overflow:
5274  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5275  break;
5276  case Builtin::BI__builtin_smul_overflow:
5277  case Builtin::BI__builtin_smull_overflow:
5278  case Builtin::BI__builtin_smulll_overflow:
5279  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5280  break;
5281  }
5282 
5283 
5284  llvm::Value *Carry;
5285  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5286  Builder.CreateStore(Sum, SumOutPtr);
5287 
5288  return RValue::get(Carry);
5289  }
5290  case Builtin::BIaddressof:
5291  case Builtin::BI__addressof:
5292  case Builtin::BI__builtin_addressof:
5293  return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5294  case Builtin::BI__builtin_function_start:
5295  return RValue::get(CGM.GetFunctionStart(
5296  E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
5297  case Builtin::BI__builtin_operator_new:
5298  return EmitBuiltinNewDeleteCall(
5299  E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5300  case Builtin::BI__builtin_operator_delete:
5301  EmitBuiltinNewDeleteCall(
5302  E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5303  return RValue::get(nullptr);
5304 
5305  case Builtin::BI__builtin_is_aligned:
5306  return EmitBuiltinIsAligned(E);
5307  case Builtin::BI__builtin_align_up:
5308  return EmitBuiltinAlignTo(E, true);
5309  case Builtin::BI__builtin_align_down:
5310  return EmitBuiltinAlignTo(E, false);
5311 
5312  case Builtin::BI__noop:
5313  // __noop always evaluates to an integer literal zero.
5314  return RValue::get(ConstantInt::get(IntTy, 0));
5315  case Builtin::BI__builtin_call_with_static_chain: {
5316  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5317  const Expr *Chain = E->getArg(1);
5318  return EmitCall(Call->getCallee()->getType(),
5319  EmitCallee(Call->getCallee()), Call, ReturnValue,
5320  EmitScalarExpr(Chain));
5321  }
5322  case Builtin::BI_InterlockedExchange8:
5323  case Builtin::BI_InterlockedExchange16:
5324  case Builtin::BI_InterlockedExchange:
5325  case Builtin::BI_InterlockedExchangePointer:
5326  return RValue::get(
5327  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5328  case Builtin::BI_InterlockedCompareExchangePointer:
5329  case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5330  llvm::Type *RTy;
5331  llvm::IntegerType *IntType = IntegerType::get(
5332  getLLVMContext(), getContext().getTypeSize(E->getType()));
5333 
5334  Address DestAddr = CheckAtomicAlignment(*this, E);
5335 
5336  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5337  RTy = Exchange->getType();
5338  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5339 
5340  llvm::Value *Comparand =
5341  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5342 
5343  auto Ordering =
5344  BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5345  AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5346 
5347  auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5348  Ordering, Ordering);
5349  Result->setVolatile(true);
5350 
5351  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5352  0),
5353  RTy));
5354  }
5355  case Builtin::BI_InterlockedCompareExchange8:
5356  case Builtin::BI_InterlockedCompareExchange16:
5357  case Builtin::BI_InterlockedCompareExchange:
5358  case Builtin::BI_InterlockedCompareExchange64:
5359  return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
5360  case Builtin::BI_InterlockedIncrement16:
5361  case Builtin::BI_InterlockedIncrement:
5362  return RValue::get(
5363  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5364  case Builtin::BI_InterlockedDecrement16:
5365  case Builtin::BI_InterlockedDecrement:
5366  return RValue::get(
5367  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5368  case Builtin::BI_InterlockedAnd8:
5369  case Builtin::BI_InterlockedAnd16:
5370  case Builtin::BI_InterlockedAnd:
5371  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5372  case Builtin::BI_InterlockedExchangeAdd8:
5373  case Builtin::BI_InterlockedExchangeAdd16:
5374  case Builtin::BI_InterlockedExchangeAdd:
5375  return RValue::get(
5376  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5377  case Builtin::BI_InterlockedExchangeSub8:
5378  case Builtin::BI_InterlockedExchangeSub16:
5379  case Builtin::BI_InterlockedExchangeSub:
5380  return RValue::get(
5381  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5382  case Builtin::BI_InterlockedOr8:
5383  case Builtin::BI_InterlockedOr16:
5384  case Builtin::BI_InterlockedOr:
5385  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5386  case Builtin::BI_InterlockedXor8:
5387  case Builtin::BI_InterlockedXor16:
5388  case Builtin::BI_InterlockedXor:
5389  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5390 
5391  case Builtin::BI_bittest64:
5392  case Builtin::BI_bittest:
5393  case Builtin::BI_bittestandcomplement64:
5394  case Builtin::BI_bittestandcomplement:
5395  case Builtin::BI_bittestandreset64:
5396  case Builtin::BI_bittestandreset:
5397  case Builtin::BI_bittestandset64:
5398  case Builtin::BI_bittestandset:
5399  case Builtin::BI_interlockedbittestandreset:
5400  case Builtin::BI_interlockedbittestandreset64:
5401  case Builtin::BI_interlockedbittestandset64:
5402  case Builtin::BI_interlockedbittestandset:
5403  case Builtin::BI_interlockedbittestandset_acq:
5404  case Builtin::BI_interlockedbittestandset_rel:
5405  case Builtin::BI_interlockedbittestandset_nf:
5406  case Builtin::BI_interlockedbittestandreset_acq:
5407  case Builtin::BI_interlockedbittestandreset_rel:
5408  case Builtin::BI_interlockedbittestandreset_nf:
5409  return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5410 
5411  // These builtins exist to emit regular volatile loads and stores not
5412  // affected by the -fms-volatile setting.
5413  case Builtin::BI__iso_volatile_load8:
5414  case Builtin::BI__iso_volatile_load16:
5415  case Builtin::BI__iso_volatile_load32:
5416  case Builtin::BI__iso_volatile_load64:
5417  return RValue::get(EmitISOVolatileLoad(*this, E));
5418  case Builtin::BI__iso_volatile_store8:
5419  case Builtin::BI__iso_volatile_store16:
5420  case Builtin::BI__iso_volatile_store32:
5421  case Builtin::BI__iso_volatile_store64:
5422  return RValue::get(EmitISOVolatileStore(*this, E));
5423 
5424  case Builtin::BI__builtin_ptrauth_auth:
5425  case Builtin::BI__builtin_ptrauth_auth_and_resign:
5426  case Builtin::BI__builtin_ptrauth_blend_discriminator:
5427  case Builtin::BI__builtin_ptrauth_sign_generic_data:
5428  case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5429  case Builtin::BI__builtin_ptrauth_strip: {
5430  // Emit the arguments.
5432  for (auto argExpr : E->arguments())
5433  Args.push_back(EmitScalarExpr(argExpr));
5434 
5435  // Cast the value to intptr_t, saving its original type.
5436  llvm::Type *OrigValueType = Args[0]->getType();
5437  if (OrigValueType->isPointerTy())
5438  Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5439 
5440  switch (BuiltinID) {
5441  case Builtin::BI__builtin_ptrauth_auth_and_resign:
5442  if (Args[4]->getType()->isPointerTy())
5443  Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5444  LLVM_FALLTHROUGH;
5445 
5446  case Builtin::BI__builtin_ptrauth_auth:
5447  case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5448  if (Args[2]->getType()->isPointerTy())
5449  Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5450  break;
5451 
5452  case Builtin::BI__builtin_ptrauth_sign_generic_data:
5453  if (Args[1]->getType()->isPointerTy())
5454  Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5455  break;
5456 
5457  case Builtin::BI__builtin_ptrauth_blend_discriminator:
5458  case Builtin::BI__builtin_ptrauth_strip:
5459  break;
5460  }
5461 
5462  // Call the intrinsic.
5463  auto IntrinsicID = [&]() -> unsigned {
5464  switch (BuiltinID) {
5465  case Builtin::BI__builtin_ptrauth_auth:
5466  return llvm::Intrinsic::ptrauth_auth;
5467  case Builtin::BI__builtin_ptrauth_auth_and_resign:
5468  return llvm::Intrinsic::ptrauth_resign;
5469  case Builtin::BI__builtin_ptrauth_blend_discriminator:
5470  return llvm::Intrinsic::ptrauth_blend;
5471  case Builtin::BI__builtin_ptrauth_sign_generic_data:
5472  return llvm::Intrinsic::ptrauth_sign_generic;
5473  case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5474  return llvm::Intrinsic::ptrauth_sign;
5475  case Builtin::BI__builtin_ptrauth_strip:
5477  }
5478  llvm_unreachable("bad ptrauth intrinsic");
5479  }();
5480  auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5481  llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5482 
5483  if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5484  BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5485  OrigValueType->isPointerTy()) {
5486  Result = Builder.CreateIntToPtr(Result, OrigValueType);
5487  }
5488  return RValue::get(Result);
5489  }
5490 
5491  case Builtin::BI__exception_code:
5492  case Builtin::BI_exception_code:
5493  return RValue::get(EmitSEHExceptionCode());
5494  case Builtin::BI__exception_info:
5495  case Builtin::BI_exception_info:
5496  return RValue::get(EmitSEHExceptionInfo());
5497  case Builtin::BI__abnormal_termination:
5498  case Builtin::BI_abnormal_termination:
5499  return RValue::get(EmitSEHAbnormalTermination());
5500  case Builtin::BI_setjmpex:
5501  if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5502  E->getArg(0)->getType()->isPointerType())
5503  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5504  break;
5505  case Builtin::BI_setjmp:
5506  if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5507  E->getArg(0)->getType()->isPointerType()) {
5508  if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5509  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5510  else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5511  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5512  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5513  }
5514  break;
5515 
5516  // C++ std:: builtins.
5517  case Builtin::BImove:
5518  case Builtin::BImove_if_noexcept:
5519  case Builtin::BIforward:
5520  case Builtin::BIforward_like:
5521  case Builtin::BIas_const:
5522  return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5523  case Builtin::BI__GetExceptionInfo: {
5524  if (llvm::GlobalVariable *GV =
5525  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
5526  return RValue::get(GV);
5527  break;
5528  }
5529 
5530  case Builtin::BI__fastfail:
5531  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5532 
5533  case Builtin::BI__builtin_coro_id:
5534  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5535  case Builtin::BI__builtin_coro_promise:
5536  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5537  case Builtin::BI__builtin_coro_resume:
5538  EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5539  return RValue::get(nullptr);
5540  case Builtin::BI__builtin_coro_frame:
5541  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5542  case Builtin::BI__builtin_coro_noop:
5543  return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5544  case Builtin::BI__builtin_coro_free:
5545  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5546  case Builtin::BI__builtin_coro_destroy:
5547  EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5548  return RValue::get(nullptr);
5549  case Builtin::BI__builtin_coro_done:
5550  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5551  case Builtin::BI__builtin_coro_alloc:
5552  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5553  case Builtin::BI__builtin_coro_begin:
5554  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5555  case Builtin::BI__builtin_coro_end:
5556  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5557  case Builtin::BI__builtin_coro_suspend:
5558  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5559  case Builtin::BI__builtin_coro_size:
5560  return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5561  case Builtin::BI__builtin_coro_align:
5562  return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5563 
5564  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5565  case Builtin::BIread_pipe:
5566  case Builtin::BIwrite_pipe: {
5567  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5568  *Arg1 = EmitScalarExpr(E->getArg(1));
5569  CGOpenCLRuntime OpenCLRT(CGM);
5570  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5571  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5572 
5573  // Type of the generic packet parameter.
5574  unsigned GenericAS =
5575  getContext().getTargetAddressSpace(LangAS::opencl_generic);
5576  llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5577 
5578  // Testing which overloaded version we should generate the call for.
5579  if (2U == E->getNumArgs()) {
5580  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5581  : "__write_pipe_2";
5582  // Creating a generic function type to be able to call with any builtin or
5583  // user defined type.
5584  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5585  llvm::FunctionType *FTy = llvm::FunctionType::get(
5586  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5587  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5588  return RValue::get(
5589  EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5590  {Arg0, BCast, PacketSize, PacketAlign}));
5591  } else {
5592  assert(4 == E->getNumArgs() &&
5593  "Illegal number of parameters to pipe function");
5594  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5595  : "__write_pipe_4";
5596 
5597  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5598  Int32Ty, Int32Ty};
5599  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5600  *Arg3 = EmitScalarExpr(E->getArg(3));
5601  llvm::FunctionType *FTy = llvm::FunctionType::get(
5602  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5603  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5604  // We know the third argument is an integer type, but we may need to cast
5605  // it to i32.
5606  if (Arg2->getType() != Int32Ty)
5607  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5608  return RValue::get(
5609  EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5610  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5611  }
5612  }
5613  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5614  // functions
5615  case Builtin::BIreserve_read_pipe:
5616  case Builtin::BIreserve_write_pipe:
5617  case Builtin::BIwork_group_reserve_read_pipe:
5618  case Builtin::BIwork_group_reserve_write_pipe:
5619  case Builtin::BIsub_group_reserve_read_pipe:
5620  case Builtin::BIsub_group_reserve_write_pipe: {
5621  // Composing the mangled name for the function.
5622  const char *Name;
5623  if (BuiltinID == Builtin::BIreserve_read_pipe)
5624  Name = "__reserve_read_pipe";
5625  else if (BuiltinID == Builtin::BIreserve_write_pipe)
5626  Name = "__reserve_write_pipe";
5627  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5628  Name = "__work_group_reserve_read_pipe";
5629  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5630  Name = "__work_group_reserve_write_pipe";
5631  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5632  Name = "__sub_group_reserve_read_pipe";
5633  else
5634  Name = "__sub_group_reserve_write_pipe";
5635 
5636  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5637  *Arg1 = EmitScalarExpr(E->getArg(1));
5638  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5639  CGOpenCLRuntime OpenCLRT(CGM);
5640  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5641  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5642 
5643  // Building the generic function prototype.
5644  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5645  llvm::FunctionType *FTy = llvm::FunctionType::get(
5646  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5647  // We know the second argument is an integer type, but we may need to cast
5648  // it to i32.
5649  if (Arg1->getType() != Int32Ty)
5650  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5651  return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5652  {Arg0, Arg1, PacketSize, PacketAlign}));
5653  }
5654  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5655  // functions
5656  case Builtin::BIcommit_read_pipe:
5657  case Builtin::BIcommit_write_pipe:
5658  case Builtin::BIwork_group_commit_read_pipe:
5659  case Builtin::BIwork_group_commit_write_pipe:
5660  case Builtin::BIsub_group_commit_read_pipe:
5661  case Builtin::BIsub_group_commit_write_pipe: {
5662  const char *Name;
5663  if (BuiltinID == Builtin::BIcommit_read_pipe)
5664  Name = "__commit_read_pipe";
5665  else if (BuiltinID == Builtin::BIcommit_write_pipe)
5666  Name = "__commit_write_pipe";
5667  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5668  Name = "__work_group_commit_read_pipe";
5669  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5670  Name = "__work_group_commit_write_pipe";
5671  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5672  Name = "__sub_group_commit_read_pipe";
5673  else
5674  Name = "__sub_group_commit_write_pipe";
5675 
5676  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5677  *Arg1 = EmitScalarExpr(E->getArg(1));
5678  CGOpenCLRuntime OpenCLRT(CGM);
5679  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5680  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5681 
5682  // Building the generic function prototype.
5683  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5684  llvm::FunctionType *FTy =
5685  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5686  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5687 
5688  return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5689  {Arg0, Arg1, PacketSize, PacketAlign}));
5690  }
5691  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5692  case Builtin::BIget_pipe_num_packets:
5693  case Builtin::BIget_pipe_max_packets: {
5694  const char *BaseName;
5695  const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5696  if (BuiltinID == Builtin::BIget_pipe_num_packets)
5697  BaseName = "__get_pipe_num_packets";
5698  else
5699  BaseName = "__get_pipe_max_packets";
5700  std::string Name = std::string(BaseName) +
5701  std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5702 
5703  // Building the generic function prototype.
5704  Value *Arg0 = EmitScalarExpr(E->getArg(0));
5705  CGOpenCLRuntime OpenCLRT(CGM);
5706  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5707  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5708  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5709  llvm::FunctionType *FTy = llvm::FunctionType::get(
5710  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5711 
5712  return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5713  {Arg0, PacketSize, PacketAlign}));
5714  }
5715 
5716  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5717  case Builtin::BIto_global:
5718  case Builtin::BIto_local:
5719  case Builtin::BIto_private: {
5720  auto Arg0 = EmitScalarExpr(E->getArg(0));
5721  auto NewArgT = llvm::PointerType::get(
5722  getLLVMContext(),
5723  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
5724  auto NewRetT = llvm::PointerType::get(
5725  getLLVMContext(),
5726  CGM.getContext().getTargetAddressSpace(
5728  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5729  llvm::Value *NewArg;
5730  if (Arg0->getType()->getPointerAddressSpace() !=
5731  NewArgT->getPointerAddressSpace())
5732  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5733  else
5734  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5735  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5736  auto NewCall =
5737  EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5738  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5739  ConvertType(E->getType())));
5740  }
5741 
5742  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5743  // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5744  // The code below expands the builtin call to a call to one of the following
5745  // functions that an OpenCL runtime library will have to provide:
5746  // __enqueue_kernel_basic
5747  // __enqueue_kernel_varargs
5748  // __enqueue_kernel_basic_events
5749  // __enqueue_kernel_events_varargs
5750  case Builtin::BIenqueue_kernel: {
5751  StringRef Name; // Generated function call name
5752  unsigned NumArgs = E->getNumArgs();
5753 
5754  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5755  llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5756  getContext().getTargetAddressSpace(LangAS::opencl_generic));
5757 
5758  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5759  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5760  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5761  llvm::Value *Range = NDRangeL.getAddress(*this).emitRawPointer(*this);
5762  llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
5763 
5764  if (NumArgs == 4) {
5765  // The most basic form of the call with parameters:
5766  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5767  Name = "__enqueue_kernel_basic";
5768  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5769  GenericVoidPtrTy};
5770  llvm::FunctionType *FTy = llvm::FunctionType::get(
5771  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5772 
5773  auto Info =
5774  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5775  llvm::Value *Kernel =
5776  Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5777  llvm::Value *Block =
5778  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5779 
5780  AttrBuilder B(Builder.getContext());
5781  B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
5782  llvm::AttributeList ByValAttrSet =
5783  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5784 
5785  auto RTCall =
5786  EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5787  {Queue, Flags, Range, Kernel, Block});
5788  RTCall->setAttributes(ByValAttrSet);
5789  return RValue::get(RTCall);
5790  }
5791  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5792 
5793  // Create a temporary array to hold the sizes of local pointer arguments
5794  // for the block. \p First is the position of the first size argument.
5795  auto CreateArrayForSizeVar = [=](unsigned First)
5796  -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5797  llvm::APInt ArraySize(32, NumArgs - First);
5798  QualType SizeArrayTy = getContext().getConstantArrayType(
5799  getContext().getSizeType(), ArraySize, nullptr,
5801  /*IndexTypeQuals=*/0);
5802  auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5803  llvm::Value *TmpPtr = Tmp.getPointer();
5804  llvm::Value *TmpSize = EmitLifetimeStart(
5805  CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5806  llvm::Value *ElemPtr;
5807  // Each of the following arguments specifies the size of the corresponding
5808  // argument passed to the enqueued block.
5809  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5810  for (unsigned I = First; I < NumArgs; ++I) {
5811  auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5812  auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5813  {Zero, Index});
5814  if (I == First)
5815  ElemPtr = GEP;
5816  auto *V =
5817  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5818  Builder.CreateAlignedStore(
5819  V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5820  }
5821  return std::tie(ElemPtr, TmpSize, TmpPtr);
5822  };
5823 
5824  // Could have events and/or varargs.
5825  if (E->getArg(3)->getType()->isBlockPointerType()) {
5826  // No events passed, but has variadic arguments.
5827  Name = "__enqueue_kernel_varargs";
5828  auto Info =
5829  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5830  llvm::Value *Kernel =
5831  Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5832  auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5833  llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5834  std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5835 
5836  // Create a vector of the arguments, as well as a constant value to
5837  // express to the runtime the number of variadic arguments.
5838  llvm::Value *const Args[] = {Queue, Flags,
5839  Range, Kernel,
5840  Block, ConstantInt::get(IntTy, NumArgs - 4),
5841  ElemPtr};
5842  llvm::Type *const ArgTys[] = {
5843  QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5844  GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5845 
5846  llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5847  auto Call = RValue::get(
5848  EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5849  if (TmpSize)
5850  EmitLifetimeEnd(TmpSize, TmpPtr);
5851  return Call;
5852  }
5853  // Any calls now have event arguments passed.
5854  if (NumArgs >= 7) {
5855  llvm::PointerType *PtrTy = llvm::PointerType::get(
5856  CGM.getLLVMContext(),
5857  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
5858 
5859  llvm::Value *NumEvents =
5860  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5861 
5862  // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5863  // to be a null pointer constant (including `0` literal), we can take it
5864  // into account and emit null pointer directly.
5865  llvm::Value *EventWaitList = nullptr;
5866  if (E->getArg(4)->isNullPointerConstant(
5867  getContext(), Expr::NPC_ValueDependentIsNotNull)) {
5868  EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5869  } else {
5870  EventWaitList =
5871  E->getArg(4)->getType()->isArrayType()
5872  ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
5873  : EmitScalarExpr(E->getArg(4));
5874  // Convert to generic address space.
5875  EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5876  }
5877  llvm::Value *EventRet = nullptr;
5878  if (E->getArg(5)->isNullPointerConstant(
5879  getContext(), Expr::NPC_ValueDependentIsNotNull)) {
5880  EventRet = llvm::ConstantPointerNull::get(PtrTy);
5881  } else {
5882  EventRet =
5883  Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5884  }
5885 
5886  auto Info =
5887  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
5888  llvm::Value *Kernel =
5889  Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5890  llvm::Value *Block =
5891  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5892 
5893  std::vector<llvm::Type *> ArgTys = {
5894  QueueTy, Int32Ty, RangeTy, Int32Ty,
5895  PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5896 
5897  std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5898  NumEvents, EventWaitList, EventRet,
5899  Kernel, Block};
5900 
5901  if (NumArgs == 7) {
5902  // Has events but no variadics.
5903  Name = "__enqueue_kernel_basic_events";
5904  llvm::FunctionType *FTy = llvm::FunctionType::get(
5905  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5906  return RValue::get(
5907  EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5909  }
5910  // Has event info and variadics
5911  // Pass the number of variadics to the runtime function too.
5912  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5913  ArgTys.push_back(Int32Ty);
5914  Name = "__enqueue_kernel_events_varargs";
5915 
5916  llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5917  std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5918  Args.push_back(ElemPtr);
5919  ArgTys.push_back(ElemPtr->getType());
5920 
5921  llvm::FunctionType *FTy = llvm::FunctionType::get(
5922  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5923  auto Call =
5924  RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5926  if (TmpSize)
5927  EmitLifetimeEnd(TmpSize, TmpPtr);
5928  return Call;
5929  }
5930  llvm_unreachable("Unexpected enqueue_kernel signature");
5931  }
5932  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5933  // parameter.
5934  case Builtin::BIget_kernel_work_group_size: {
5935  llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5936  getContext().getTargetAddressSpace(LangAS::opencl_generic));
5937  auto Info =
5938  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5939  Value *Kernel =
5940  Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5941  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5942  return RValue::get(EmitRuntimeCall(
5943  CGM.CreateRuntimeFunction(
5944  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5945  false),
5946  "__get_kernel_work_group_size_impl"),
5947  {Kernel, Arg}));
5948  }
5949  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5950  llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5951  getContext().getTargetAddressSpace(LangAS::opencl_generic));
5952  auto Info =
5953  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5954  Value *Kernel =
5955  Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5956  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5957  return RValue::get(EmitRuntimeCall(
5958  CGM.CreateRuntimeFunction(
5959  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5960  false),
5961  "__get_kernel_preferred_work_group_size_multiple_impl"),
5962  {Kernel, Arg}));
5963  }
5964  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5965  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5966  llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5967  getContext().getTargetAddressSpace(LangAS::opencl_generic));
5968  LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5969  llvm::Value *NDRange = NDRangeL.getAddress(*this).emitRawPointer(*this);
5970  auto Info =
5971  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
5972  Value *Kernel =
5973  Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5974  Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5975  const char *Name =
5976  BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5977  ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5978  : "__get_kernel_sub_group_count_for_ndrange_impl";
5979  return RValue::get(EmitRuntimeCall(
5980  CGM.CreateRuntimeFunction(
5981  llvm::FunctionType::get(
5982  IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5983  false),
5984  Name),
5985  {NDRange, Kernel, Block}));
5986  }
5987  case Builtin::BI__builtin_store_half:
5988  case Builtin::BI__builtin_store_halff: {
5989  Value *Val = EmitScalarExpr(E->getArg(0));
5991  Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5992  Builder.CreateStore(HalfVal, Address);
5993  return RValue::get(nullptr);
5994  }
5995  case Builtin::BI__builtin_load_half: {
5997  Value *HalfVal = Builder.CreateLoad(Address);
5998  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5999  }
6000  case Builtin::BI__builtin_load_halff: {
6002  Value *HalfVal = Builder.CreateLoad(Address);
6003  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6004  }
6005  case Builtin::BI__builtin_printf:
6006  case Builtin::BIprintf:
6007  if (getTarget().getTriple().isNVPTX() ||
6008  getTarget().getTriple().isAMDGCN()) {
6009  if (getLangOpts().OpenMPIsTargetDevice)
6010  return EmitOpenMPDevicePrintfCallExpr(E);
6011  if (getTarget().getTriple().isNVPTX())
6012  return EmitNVPTXDevicePrintfCallExpr(E);
6013  if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
6014  return EmitAMDGPUDevicePrintfCallExpr(E);
6015  }
6016 
6017  break;
6018  case Builtin::BI__builtin_canonicalize:
6019  case Builtin::BI__builtin_canonicalizef:
6020  case Builtin::BI__builtin_canonicalizef16:
6021  case Builtin::BI__builtin_canonicalizel:
6022  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
6023 
6024  case Builtin::BI__builtin_thread_pointer: {
6025  if (!getContext().getTargetInfo().isTLSSupported())
6026  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6027  // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6028  break;
6029  }
6030  case Builtin::BI__builtin_os_log_format:
6031  return emitBuiltinOSLogFormat(*E);
6032 
6033  case Builtin::BI__xray_customevent: {
6034  if (!ShouldXRayInstrumentFunction())
6035  return RValue::getIgnored();
6036 
6037  if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
6039  return RValue::getIgnored();
6040 
6041  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6042  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6043  return RValue::getIgnored();
6044 
6045  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6046  auto FTy = F->getFunctionType();
6047  auto Arg0 = E->getArg(0);
6048  auto Arg0Val = EmitScalarExpr(Arg0);
6049  auto Arg0Ty = Arg0->getType();
6050  auto PTy0 = FTy->getParamType(0);
6051  if (PTy0 != Arg0Val->getType()) {
6052  if (Arg0Ty->isArrayType())
6053  Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6054  else
6055  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6056  }
6057  auto Arg1 = EmitScalarExpr(E->getArg(1));
6058  auto PTy1 = FTy->getParamType(1);
6059  if (PTy1 != Arg1->getType())
6060  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6061  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6062  }
6063 
6064  case Builtin::BI__xray_typedevent: {
6065  // TODO: There should be a way to always emit events even if the current
6066  // function is not instrumented. Losing events in a stream can cripple
6067  // a trace.
6068  if (!ShouldXRayInstrumentFunction())
6069  return RValue::getIgnored();
6070 
6071  if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
6073  return RValue::getIgnored();
6074 
6075  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6076  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6077  return RValue::getIgnored();
6078 
6079  Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6080  auto FTy = F->getFunctionType();
6081  auto Arg0 = EmitScalarExpr(E->getArg(0));
6082  auto PTy0 = FTy->getParamType(0);
6083  if (PTy0 != Arg0->getType())
6084  Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6085  auto Arg1 = E->getArg(1);
6086  auto Arg1Val = EmitScalarExpr(Arg1);
6087  auto Arg1Ty = Arg1->getType();
6088  auto PTy1 = FTy->getParamType(1);
6089  if (PTy1 != Arg1Val->getType()) {
6090  if (Arg1Ty->isArrayType())
6091  Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6092  else
6093  Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6094  }
6095  auto Arg2 = EmitScalarExpr(E->getArg(2));
6096  auto PTy2 = FTy->getParamType(2);
6097  if (PTy2 != Arg2->getType())
6098  Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6099  return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6100  }
6101 
6102  case Builtin::BI__builtin_ms_va_start:
6103  case Builtin::BI__builtin_ms_va_end:
6104  return RValue::get(
6105  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).emitRawPointer(*this),
6106  BuiltinID == Builtin::BI__builtin_ms_va_start));
6107 
6108  case Builtin::BI__builtin_ms_va_copy: {
6109  // Lower this manually. We can't reliably determine whether or not any
6110  // given va_copy() is for a Win64 va_list from the calling convention
6111  // alone, because it's legal to do this from a System V ABI function.
6112  // With opaque pointer types, we won't have enough information in LLVM
6113  // IR to determine this from the argument types, either. Best to do it
6114  // now, while we have enough information.
6115  Address DestAddr = EmitMSVAListRef(E->getArg(0));
6116  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6117 
6118  DestAddr = DestAddr.withElementType(Int8PtrTy);
6119  SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6120 
6121  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6122  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6123  }
6124 
6125  // SYCL
6126  case Builtin::BI__builtin_intel_fpga_reg:
6127  return EmitIntelFPGARegBuiltin(E, ReturnValue);
6128  case Builtin::BI__builtin_intel_fpga_mem:
6129  return EmitIntelFPGAMemBuiltin(E);
6130  case Builtin::BI__builtin_intel_sycl_ptr_annotation:
6131  return EmitIntelSYCLPtrAnnotationBuiltin(E);
6132  case Builtin::BI__builtin_intel_sycl_alloca:
6133  case Builtin::BI__builtin_intel_sycl_alloca_with_align:
6134  return EmitIntelSYCLAllocaBuiltin(BuiltinID, E, ReturnValue);
6135  case Builtin::BI__builtin_get_device_side_mangled_name: {
6136  auto Name = CGM.getCUDARuntime().getDeviceSideName(
6137  cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6138  auto Str = CGM.GetAddrOfConstantCString(Name, "");
6139  llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
6140  llvm::ConstantInt::get(SizeTy, 0)};
6141  auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
6142  Str.getPointer(), Zeros);
6143  return RValue::get(Ptr);
6144  }
6145  }
6146 
6147  // If this is an alias for a lib function (e.g. __builtin_sin), emit
6148  // the call using the normal call path, but using the unmangled
6149  // version of the function name.
6150  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6151  return emitLibraryCall(*this, FD, E,
6152  CGM.getBuiltinLibFunction(FD, BuiltinID));
6153 
6154  // If this is a predefined lib function (e.g. malloc), emit the call
6155  // using exactly the normal call path.
6156  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6157  return emitLibraryCall(
6158  *this, FD, E, cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
6159 
6160  // Check that a call to a target specific builtin has the correct target
6161  // features.
6162  // This is down here to avoid non-target specific builtins, however, if
6163  // generic builtins start to require generic target features then we
6164  // can move this up to the beginning of the function.
6165  checkTargetFeatures(E, FD);
6166 
6167  if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6168  LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6169 
6170  // See if we have a target specific intrinsic.
6171  StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6172  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6173  StringRef Prefix =
6174  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6175  if (!Prefix.empty()) {
6176  IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6177  // NOTE we don't need to perform a compatibility flag check here since the
6178  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6179  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6180  if (IntrinsicID == Intrinsic::not_intrinsic)
6181  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6182  }
6183 
6184  if (IntrinsicID != Intrinsic::not_intrinsic) {
6186 
6187  // Find out if any arguments are required to be integer constant
6188  // expressions.
6189  unsigned ICEArguments = 0;
6191  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6192  assert(Error == ASTContext::GE_None && "Should not codegen an error");
6193 
6194  Function *F = CGM.getIntrinsic(IntrinsicID);
6195  llvm::FunctionType *FTy = F->getFunctionType();
6196 
6197  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6198  Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6199  // If the intrinsic arg type is different from the builtin arg type
6200  // we need to do a bit cast.
6201  llvm::Type *PTy = FTy->getParamType(i);
6202  if (PTy != ArgValue->getType()) {
6203  // XXX - vector of pointers?
6204  if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6205  if (PtrTy->getAddressSpace() !=
6206  ArgValue->getType()->getPointerAddressSpace()) {
6207  ArgValue = Builder.CreateAddrSpaceCast(
6208  ArgValue, llvm::PointerType::get(getLLVMContext(),
6209  PtrTy->getAddressSpace()));
6210  }
6211  }
6212 
6213  // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6214  // in amx intrinsics.
6215  if (PTy->isX86_AMXTy())
6216  ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6217  {ArgValue->getType()}, {ArgValue});
6218  else
6219  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6220  }
6221 
6222  Args.push_back(ArgValue);
6223  }
6224 
6225  Value *V = Builder.CreateCall(F, Args);
6226  QualType BuiltinRetType = E->getType();
6227 
6228  llvm::Type *RetTy = VoidTy;
6229  if (!BuiltinRetType->isVoidType())
6230  RetTy = ConvertType(BuiltinRetType);
6231 
6232  if (RetTy != V->getType()) {
6233  // XXX - vector of pointers?
6234  if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6235  if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6236  V = Builder.CreateAddrSpaceCast(
6237  V, llvm::PointerType::get(getLLVMContext(),
6238  PtrTy->getAddressSpace()));
6239  }
6240  }
6241 
6242  // Cast x86_amx to vector type (e.g., v256i32), this only happen
6243  // in amx intrinsics.
6244  if (V->getType()->isX86_AMXTy())
6245  V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6246  {V});
6247  else
6248  V = Builder.CreateBitCast(V, RetTy);
6249  }
6250 
6251  if (RetTy->isVoidTy())
6252  return RValue::get(nullptr);
6253 
6254  return RValue::get(V);
6255  }
6256 
6257  // Some target-specific builtins can have aggregate return values, e.g.
6258  // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6259  // ReturnValue to be non-null, so that the target-specific emission code can
6260  // always just emit into it.
6261  TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
6262  if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6263  Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6264  ReturnValue = ReturnValueSlot(DestPtr, false);
6265  }
6266 
6267  // Now see if we can emit a target-specific builtin.
6268  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6269  switch (EvalKind) {
6270  case TEK_Scalar:
6271  if (V->getType()->isVoidTy())
6272  return RValue::get(nullptr);
6273  return RValue::get(V);
6274  case TEK_Aggregate:
6275  return RValue::getAggregate(ReturnValue.getAddress(),
6276  ReturnValue.isVolatile());
6277  case TEK_Complex:
6278  llvm_unreachable("No current target builtin returns complex");
6279  }
6280  llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6281  }
6282 
6283  // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6284  if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6285  return RValue::get(V);
6286 
6287  if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6288  return EmitHipStdParUnsupportedBuiltin(this, FD);
6289 
6290  ErrorUnsupported(E, "builtin function");
6291 
6292  // Unknown builtin, for now just dump it out and return undef.
6293  return GetUndefRValue(E->getType());
6294 }
6295 
6297  unsigned BuiltinID, const CallExpr *E,
6298  ReturnValueSlot ReturnValue,
6299  llvm::Triple::ArchType Arch) {
6300  // When compiling in HipStdPar mode we have to be conservative in rejecting
6301  // target specific features in the FE, and defer the possible error to the
6302  // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6303  // referenced by an accelerator executable function, we emit an error.
6304  // Returning nullptr here leads to the builtin being handled in
6305  // EmitStdParUnsupportedBuiltin.
6306  if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6307  Arch != CGF->getTarget().getTriple().getArch())
6308  return nullptr;
6309 
6310  switch (Arch) {
6311  case llvm::Triple::arm:
6312  case llvm::Triple::armeb:
6313  case llvm::Triple::thumb:
6314  case llvm::Triple::thumbeb:
6315  return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6316  case llvm::Triple::aarch64:
6317  case llvm::Triple::aarch64_32:
6318  case llvm::Triple::aarch64_be:
6319  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6320  case llvm::Triple::bpfeb:
6321  case llvm::Triple::bpfel:
6322  return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6323  case llvm::Triple::x86:
6324  case llvm::Triple::x86_64:
6325  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6326  case llvm::Triple::ppc:
6327  case llvm::Triple::ppcle:
6328  case llvm::Triple::ppc64:
6329  case llvm::Triple::ppc64le:
6330  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6331  case llvm::Triple::r600:
6332  case llvm::Triple::amdgcn:
6333  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6334  case llvm::Triple::systemz:
6335  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6336  case llvm::Triple::nvptx:
6337  case llvm::Triple::nvptx64:
6338  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6339  case llvm::Triple::wasm32:
6340  case llvm::Triple::wasm64:
6341  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6342  case llvm::Triple::hexagon:
6343  return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6344  case llvm::Triple::riscv32:
6345  case llvm::Triple::riscv64:
6346  return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6347  default:
6348  return nullptr;
6349  }
6350 }
6351 
6353  const CallExpr *E,
6354  ReturnValueSlot ReturnValue) {
6355  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6356  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6358  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6359  ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6360  }
6361 
6362  return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6363  getTarget().getTriple().getArch());
6364 }
6365 
6366 static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6367  NeonTypeFlags TypeFlags,
6368  bool HasLegalHalfType = true,
6369  bool V1Ty = false,
6370  bool AllowBFloatArgsAndRet = true) {
6371  int IsQuad = TypeFlags.isQuad();
6372  switch (TypeFlags.getEltType()) {
6373  case NeonTypeFlags::Int8:
6374  case NeonTypeFlags::Poly8:
6375  return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6376  case NeonTypeFlags::Int16:
6377  case NeonTypeFlags::Poly16:
6378  return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6380  if (AllowBFloatArgsAndRet)
6381  return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6382  else
6383  return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6385  if (HasLegalHalfType)
6386  return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6387  else
6388  return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6389  case NeonTypeFlags::Int32:
6390  return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6391  case NeonTypeFlags::Int64:
6392  case NeonTypeFlags::Poly64:
6393  return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6395  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6396  // There is a lot of i128 and f128 API missing.
6397  // so we use v16i8 to represent poly128 and get pattern matched.
6398  return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6400  return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6402  return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6403  }
6404  llvm_unreachable("Unknown vector element type!");
6405 }
6406 
6407 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6408  NeonTypeFlags IntTypeFlags) {
6409  int IsQuad = IntTypeFlags.isQuad();
6410  switch (IntTypeFlags.getEltType()) {
6411  case NeonTypeFlags::Int16:
6412  return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6413  case NeonTypeFlags::Int32:
6414  return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6415  case NeonTypeFlags::Int64:
6416  return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6417  default:
6418  llvm_unreachable("Type can't be converted to floating-point!");
6419  }
6420 }
6421 
6423  const ElementCount &Count) {
6424  Value *SV = llvm::ConstantVector::getSplat(Count, C);
6425  return Builder.CreateShuffleVector(V, V, SV, "lane");
6426 }
6427 
6428 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
6429  ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6430  return EmitNeonSplat(V, C, EC);
6431 }
6432 
6434  const char *name,
6435  unsigned shift, bool rightshift) {
6436  unsigned j = 0;
6437  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6438  ai != ae; ++ai, ++j) {
6439  if (F->isConstrainedFPIntrinsic())
6440  if (ai->getType()->isMetadataTy())
6441  continue;
6442  if (shift > 0 && shift == j)
6443  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6444  else
6445  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6446  }
6447 
6448  if (F->isConstrainedFPIntrinsic())
6449  return Builder.CreateConstrainedFPCall(F, Ops, name);
6450  else
6451  return Builder.CreateCall(F, Ops, name);
6452 }
6453 
6455  bool neg) {
6456  int SV = cast<ConstantInt>(V)->getSExtValue();
6457  return ConstantInt::get(Ty, neg ? -SV : SV);
6458 }
6459 
6460 // Right-shift a vector by a constant.
6462  llvm::Type *Ty, bool usgn,
6463  const char *name) {
6464  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6465 
6466  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6467  int EltSize = VTy->getScalarSizeInBits();
6468 
6469  Vec = Builder.CreateBitCast(Vec, Ty);
6470 
6471  // lshr/ashr are undefined when the shift amount is equal to the vector
6472  // element size.
6473  if (ShiftAmt == EltSize) {
6474  if (usgn) {
6475  // Right-shifting an unsigned value by its size yields 0.
6476  return llvm::ConstantAggregateZero::get(VTy);
6477  } else {
6478  // Right-shifting a signed value by its size is equivalent
6479  // to a shift of size-1.
6480  --ShiftAmt;
6481  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6482  }
6483  }
6484 
6485  Shift = EmitNeonShiftVector(Shift, Ty, false);
6486  if (usgn)
6487  return Builder.CreateLShr(Vec, Shift, name);
6488  else
6489  return Builder.CreateAShr(Vec, Shift, name);
6490 }
6491 
6492 enum {
6493  AddRetType = (1 << 0),
6494  Add1ArgType = (1 << 1),
6495  Add2ArgTypes = (1 << 2),
6496 
6497  VectorizeRetType = (1 << 3),
6498  VectorizeArgTypes = (1 << 4),
6499 
6500  InventFloatType = (1 << 5),
6501  UnsignedAlts = (1 << 6),
6502 
6503  Use64BitVectors = (1 << 7),
6504  Use128BitVectors = (1 << 8),
6505 
6510  FpCmpzModifiers =
6512 };
6513 
6514 namespace {
6515 struct ARMVectorIntrinsicInfo {
6516  const char *NameHint;
6517  unsigned BuiltinID;
6518  unsigned LLVMIntrinsic;
6519  unsigned AltLLVMIntrinsic;
6521 
6522  bool operator<(unsigned RHSBuiltinID) const {
6523  return BuiltinID < RHSBuiltinID;
6524  }
6525  bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6526  return BuiltinID < TE.BuiltinID;
6527  }
6528 };
6529 } // end anonymous namespace
6530 
6531 #define NEONMAP0(NameBase) \
6532  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6533 
6534 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6535  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6536  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6537 
6538 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6539  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6540  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6541  TypeModifier }
6542 
6543 static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6544  NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6545  NEONMAP0(splat_lane_v),
6546  NEONMAP0(splat_laneq_v),
6547  NEONMAP0(splatq_lane_v),
6548  NEONMAP0(splatq_laneq_v),
6549  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6550  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6551  NEONMAP1(vabs_v, arm_neon_vabs, 0),
6552  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6553  NEONMAP0(vadd_v),
6554  NEONMAP0(vaddhn_v),
6555  NEONMAP0(vaddq_v),
6556  NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6557  NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6558  NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6559  NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6560  NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6561  NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6562  NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6563  NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6564  NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6565  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6566  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6567  NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6568  NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6569  NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6570  NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6571  NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6572  NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6573  NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6574  NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6575  NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6576  NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6577  NEONMAP1(vcage_v, arm_neon_vacge, 0),
6578  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6579  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6580  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6581  NEONMAP1(vcale_v, arm_neon_vacge, 0),
6582  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6583  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6584  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6585  NEONMAP0(vceqz_v),
6586  NEONMAP0(vceqzq_v),
6587  NEONMAP0(vcgez_v),
6588  NEONMAP0(vcgezq_v),
6589  NEONMAP0(vcgtz_v),
6590  NEONMAP0(vcgtzq_v),
6591  NEONMAP0(vclez_v),
6592  NEONMAP0(vclezq_v),
6593  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6594  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6595  NEONMAP0(vcltz_v),
6596  NEONMAP0(vcltzq_v),
6597  NEONMAP1(vclz_v, ctlz, Add1ArgType),
6598  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6599  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6600  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6601  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6602  NEONMAP0(vcvt_f16_s16),
6603  NEONMAP0(vcvt_f16_u16),
6604  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6605  NEONMAP0(vcvt_f32_v),
6606  NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6607  NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6608  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6609  NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6610  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6611  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6612  NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6613  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6614  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6615  NEONMAP0(vcvt_s16_f16),
6616  NEONMAP0(vcvt_s32_v),
6617  NEONMAP0(vcvt_s64_v),
6618  NEONMAP0(vcvt_u16_f16),
6619  NEONMAP0(vcvt_u32_v),
6620  NEONMAP0(vcvt_u64_v),
6621  NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6622  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6623  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6624  NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6625  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6626  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6627  NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6628  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6629  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6630  NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6631  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6632  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6633  NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6634  NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6635  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6636  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6637  NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6638  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6639  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6640  NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6641  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6642  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6643  NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6644  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6645  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6646  NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6647  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6648  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6649  NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6650  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6651  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6652  NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6653  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6654  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6655  NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6656  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6657  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6658  NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6659  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6660  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6661  NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6662  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6663  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6664  NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6665  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6666  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6667  NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6668  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6669  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6670  NEONMAP0(vcvtq_f16_s16),
6671  NEONMAP0(vcvtq_f16_u16),
6672  NEONMAP0(vcvtq_f32_v),
6673  NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6674  NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6675  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6676  NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6677  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6678  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6679  NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6680  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6681  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6682  NEONMAP0(vcvtq_s16_f16),
6683  NEONMAP0(vcvtq_s32_v),
6684  NEONMAP0(vcvtq_s64_v),
6685  NEONMAP0(vcvtq_u16_f16),
6686  NEONMAP0(vcvtq_u32_v),
6687  NEONMAP0(vcvtq_u64_v),
6688  NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6689  NEONMAP1(vdot_u32, arm_neon_udot, 0),
6690  NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6691  NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6692  NEONMAP0(vext_v),
6693  NEONMAP0(vextq_v),
6694  NEONMAP0(vfma_v),
6695  NEONMAP0(vfmaq_v),
6696  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6697  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6698  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6699  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6700  NEONMAP0(vld1_dup_v),
6701  NEONMAP1(vld1_v, arm_neon_vld1, 0),
6702  NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6703  NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6704  NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6705  NEONMAP0(vld1q_dup_v),
6706  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6707  NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6708  NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6709  NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6710  NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6711  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6712  NEONMAP1(vld2_v, arm_neon_vld2, 0),
6713  NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6714  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6715  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6716  NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6717  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6718  NEONMAP1(vld3_v, arm_neon_vld3, 0),
6719  NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6720  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6721  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6722  NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6723  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6724  NEONMAP1(vld4_v, arm_neon_vld4, 0),
6725  NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6726  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6727  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6728  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6729  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6730  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6731  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6732  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6733  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6734  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6735  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6736  NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6737  NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6738  NEONMAP0(vmovl_v),
6739  NEONMAP0(vmovn_v),
6740  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6741  NEONMAP0(vmull_v),
6742  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6743  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6744  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6745  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6746  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6747  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6748  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6749  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6750  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6751  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6752  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6753  NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6754  NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6755  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6756  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6757  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6758  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6759  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6760  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6761  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6762  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6763  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6764  NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6765  NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6766  NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6767  NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6768  NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6769  NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6770  NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6771  NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6772  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6773  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6774  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6775  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6776  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6777  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6778  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6779  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6780  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6781  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6782  NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6783  NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6784  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6785  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6786  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6787  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6788  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6789  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6790  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6791  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6792  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6793  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6794  NEONMAP0(vrndi_v),
6795  NEONMAP0(vrndiq_v),
6796  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6797  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6798  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6799  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6800  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6801  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6802  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6803  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6804  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6805  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6806  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6807  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6808  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6809  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6810  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6811  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6812  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6813  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6814  NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6815  NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6816  NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6817  NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6818  NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6819  NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6820  NEONMAP0(vshl_n_v),
6821  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6822  NEONMAP0(vshll_n_v),
6823  NEONMAP0(vshlq_n_v),
6824  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6825  NEONMAP0(vshr_n_v),
6826  NEONMAP0(vshrn_n_v),
6827  NEONMAP0(vshrq_n_v),
6828  NEONMAP1(vst1_v, arm_neon_vst1, 0),
6829  NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6830  NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6831  NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6832  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6833  NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6834  NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6835  NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6836  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6837  NEONMAP1(vst2_v, arm_neon_vst2, 0),
6838  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6839  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6840  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6841  NEONMAP1(vst3_v, arm_neon_vst3, 0),
6842  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6843  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6844  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6845  NEONMAP1(vst4_v, arm_neon_vst4, 0),
6846  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6847  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6848  NEONMAP0(vsubhn_v),
6849  NEONMAP0(vtrn_v),
6850  NEONMAP0(vtrnq_v),
6851  NEONMAP0(vtst_v),
6852  NEONMAP0(vtstq_v),
6853  NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6854  NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6855  NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6856  NEONMAP0(vuzp_v),
6857  NEONMAP0(vuzpq_v),
6858  NEONMAP0(vzip_v),
6859  NEONMAP0(vzipq_v)
6860 };
6861 
6862 static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6863  NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6864  NEONMAP0(splat_lane_v),
6865  NEONMAP0(splat_laneq_v),
6866  NEONMAP0(splatq_lane_v),
6867  NEONMAP0(splatq_laneq_v),
6868  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6869  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6870  NEONMAP0(vadd_v),
6871  NEONMAP0(vaddhn_v),
6872  NEONMAP0(vaddq_p128),
6873  NEONMAP0(vaddq_v),
6874  NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6875  NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6876  NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6877  NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6878  NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6879  NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6880  NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6881  NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6882  NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6883  NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6884  NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6885  NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6886  NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6887  NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6888  NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6889  NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6890  NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6891  NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6892  NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6893  NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6894  NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6895  NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6896  NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6897  NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6898  NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6899  NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6900  NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6901  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6902  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6903  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6904  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6905  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6906  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6907  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6908  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6909  NEONMAP0(vceqz_v),
6910  NEONMAP0(vceqzq_v),
6911  NEONMAP0(vcgez_v),
6912  NEONMAP0(vcgezq_v),
6913  NEONMAP0(vcgtz_v),
6914  NEONMAP0(vcgtzq_v),
6915  NEONMAP0(vclez_v),
6916  NEONMAP0(vclezq_v),
6917  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6918  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6919  NEONMAP0(vcltz_v),
6920  NEONMAP0(vcltzq_v),
6921  NEONMAP1(vclz_v, ctlz, Add1ArgType),
6922  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6923  NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6924  NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6925  NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6926  NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6927  NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6928  NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6929  NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6930  NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6931  NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6932  NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6933  NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6934  NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6935  NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6936  NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6937  NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6938  NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6939  NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6940  NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6941  NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6942  NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6943  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6944  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6945  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6946  NEONMAP0(vcvt_f16_s16),
6947  NEONMAP0(vcvt_f16_u16),
6948  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6949  NEONMAP0(vcvt_f32_v),
6950  NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6951  NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6952  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6953  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6954  NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6955  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6956  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6957  NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6958  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6959  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6960  NEONMAP0(vcvtq_f16_s16),
6961  NEONMAP0(vcvtq_f16_u16),
6962  NEONMAP0(vcvtq_f32_v),
6963  NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6964  NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6965  NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6966  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6967  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6968  NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6969  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6970  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6971  NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6972  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6973  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6974  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6975  NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6976  NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6977  NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6978  NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6979  NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6980  NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6981  NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6982  NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6983  NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6984  NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6985  NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6986  NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6987  NEONMAP0(vext_v),
6988  NEONMAP0(vextq_v),
6989  NEONMAP0(vfma_v),
6990  NEONMAP0(vfmaq_v),
6991  NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6992  NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6993  NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6994  NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6995  NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6996  NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6997  NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6998  NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6999  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7000  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7001  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7002  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7003  NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7004  NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7005  NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7006  NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7007  NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7008  NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7009  NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7010  NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7011  NEONMAP0(vmovl_v),
7012  NEONMAP0(vmovn_v),
7013  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7014  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7015  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7016  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7017  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7018  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7019  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7020  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7021  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7022  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7023  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7024  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7025  NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7026  NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7027  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7028  NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7029  NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7030  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7031  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7032  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7033  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7034  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7035  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7036  NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7037  NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7038  NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7039  NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7040  NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7041  NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7042  NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7043  NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7044  NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7045  NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7046  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7047  NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7048  NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7049  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7050  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7051  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7052  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7053  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7054  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7055  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7056  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7057  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7058  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7059  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7060  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7061  NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7062  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7063  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7064  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7065  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7066  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7067  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7068  NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7069  NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7070  NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7071  NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7072  NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7073  NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7074  NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7075  NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7076  NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7077  NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7078  NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7079  NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7080  NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7081  NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7082  NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7083  NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7084  NEONMAP0(vrndi_v),
7085  NEONMAP0(vrndiq_v),
7086  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7087  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7088  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7089  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7090  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7091  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7092  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7093  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7094  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7095  NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7096  NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7097  NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7098  NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7099  NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7100  NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7101  NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7102  NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7103  NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7104  NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7105  NEONMAP0(vshl_n_v),
7106  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7107  NEONMAP0(vshll_n_v),
7108  NEONMAP0(vshlq_n_v),
7109  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7110  NEONMAP0(vshr_n_v),
7111  NEONMAP0(vshrn_n_v),
7112  NEONMAP0(vshrq_n_v),
7113  NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7114  NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7115  NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7116  NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7117  NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7118  NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7119  NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7120  NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7121  NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7122  NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7123  NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7124  NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7125  NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7126  NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7127  NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7128  NEONMAP0(vsubhn_v),
7129  NEONMAP0(vtst_v),
7130  NEONMAP0(vtstq_v),
7131  NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7132  NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7133  NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7134  NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7135 };
7136 
7137 static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7138  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7139  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7140  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7141  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7142  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7143  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7144  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7145  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7146  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7147  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7148  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7149  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7150  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7151  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7152  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7153  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7154  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7155  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7156  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7157  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7158  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7159  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7160  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7161  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7162  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7163  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7164  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7165  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7166  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7167  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7168  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7169  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7170  NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7171  NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7172  NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7173  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7174  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7175  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7176  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7177  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7178  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7179  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7180  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7181  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7182  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7183  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7184  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7185  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7186  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7187  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7188  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7189  NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7190  NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7191  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7192  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7193  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7194  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7195  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7196  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7197  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7198  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7199  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7200  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7201  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7202  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7203  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7204  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7205  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7206  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7207  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7208  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7209  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7210  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7211  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7212  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7213  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7214  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7215  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7216  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7217  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7218  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7219  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7220  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7221  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7222  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7223  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7224  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7225  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7226  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7227  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7228  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7229  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7230  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7231  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7232  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7233  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7234  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7235  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7236  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7237  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7238  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7239  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7240  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7241  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7242  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7243  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7244  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7245  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7246  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7247  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7248  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7249  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7250  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7251  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7252  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7253  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7254  NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7255  NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7256  NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7257  NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7258  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7259  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7260  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7261  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7262  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7263  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7264  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7265  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7266  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7267  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7268  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7269  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7270  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7271  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7272  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7273  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7274  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7275  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7276  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7277  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7278  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7279  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7280  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7281  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7282  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7283  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7284  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7285  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7286  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7287  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7288  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7289  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7290  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7291  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7292  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7293  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7294  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7295  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7296  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7297  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7298  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7299  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7300  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7301  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7302  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7303  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7304  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7305  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7306  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7307  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7308  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7309  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7310  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7311  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7312  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7313  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7314  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7315  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7316  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7317  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7318  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7319  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7320  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7321  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7322  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7323  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7324  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7325  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7326  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7327  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7328  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7329  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7330  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7331  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7332  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7333  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7334  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7335  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7336  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7337  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7338  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7339  // FP16 scalar intrinisics go here.
7340  NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7341  NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7342  NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7343  NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7344  NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7345  NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7346  NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7347  NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7348  NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7349  NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7350  NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7351  NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7352  NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7353  NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7354  NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7355  NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7356  NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7357  NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7358  NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7359  NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7360  NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7361  NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7362  NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7363  NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7364  NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7365  NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7366  NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7367  NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7368  NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7369  NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7370  NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7371  NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7372  NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7373  NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7374 };
7375 
7376 // Some intrinsics are equivalent for codegen.
7377 static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7378  { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7379  { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7380  { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7381  { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7382  { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7383  { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7384  { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7385  { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7386  { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7387  { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7388  { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7389  { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7390  { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7391  { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7392  { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7393  { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7394  { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7395  { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7396  { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7397  { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7398  { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7399  { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7400  { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7401  { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7402  { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7403  { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7404  { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7405  { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7406  { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7407  { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7408  { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7409  { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7410  { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7411  { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7412  { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7413  { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7414  { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7415  { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7416  { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7417  { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7418  { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7419  { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7420  { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7421  { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7422  { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7423  { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7424  { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7425  { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7426  { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7427  { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7428  { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7429  { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7430  { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7431  { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7432  { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7433  { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7434  { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7435  { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7436  { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7437  { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7438  { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7439  { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7440  { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7441  { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7442  { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7443  { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7444  { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7445  { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7446  { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7447  { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7448  { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7449  { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7450  { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7451  { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7452  { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7453  { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7454  { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7455  { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7456  { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7457  { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7458  { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7459  { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7460  { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7461  { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7462  { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7463  { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7464  { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7465  { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7466  { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7467  { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7468  { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7469  { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7470  { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7471  { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7472  { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7473  { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7474  { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7475  { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7476  { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7477  { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7478  { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7479  { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7480  { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7481  { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7482  { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7483  { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7484  { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7485  { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7486  { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7487  { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7488  { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7489  { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7490  { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7491  { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7492  { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7493  { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7494  { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7495  { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7496  { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7497  { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7498  { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7499  { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7500  { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7501  { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7502  { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7503  { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7504  { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7505  { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7506  // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7507  // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7508  // arbitrary one to be handled as tha canonical variation.
7509  { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7510  { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7511  { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7512  { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7513  { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7514  { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7515  { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7516  { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7517  { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7518  { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7519  { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7520  { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7521 };
7522 
7523 #undef NEONMAP0
7524 #undef NEONMAP1
7525 #undef NEONMAP2
7526 
7527 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7528  { \
7529  #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7530  TypeModifier \
7531  }
7532 
7533 #define SVEMAP2(NameBase, TypeModifier) \
7534  { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7535 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7536 #define GET_SVE_LLVM_INTRINSIC_MAP
7537 #include "clang/Basic/arm_sve_builtin_cg.inc"
7538 #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7539 #undef GET_SVE_LLVM_INTRINSIC_MAP
7540 };
7541 
7542 #undef SVEMAP1
7543 #undef SVEMAP2
7544 
7545 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7546  { \
7547  #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7548  TypeModifier \
7549  }
7550 
7551 #define SMEMAP2(NameBase, TypeModifier) \
7552  { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7553 static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7554 #define GET_SME_LLVM_INTRINSIC_MAP
7555 #include "clang/Basic/arm_sme_builtin_cg.inc"
7556 #undef GET_SME_LLVM_INTRINSIC_MAP
7557 };
7558 
7559 #undef SMEMAP1
7560 #undef SMEMAP2
7561 
7563 
7568 
7569 static const ARMVectorIntrinsicInfo *
7571  unsigned BuiltinID, bool &MapProvenSorted) {
7572 
7573 #ifndef NDEBUG
7574  if (!MapProvenSorted) {
7575  assert(llvm::is_sorted(IntrinsicMap));
7576  MapProvenSorted = true;
7577  }
7578 #endif
7579 
7580  const ARMVectorIntrinsicInfo *Builtin =
7581  llvm::lower_bound(IntrinsicMap, BuiltinID);
7582 
7583  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7584  return Builtin;
7585 
7586  return nullptr;
7587 }
7588 
7590  unsigned Modifier,
7591  llvm::Type *ArgType,
7592  const CallExpr *E) {
7593  int VectorSize = 0;
7594  if (Modifier & Use64BitVectors)
7595  VectorSize = 64;
7596  else if (Modifier & Use128BitVectors)
7597  VectorSize = 128;
7598 
7599  // Return type.
7601  if (Modifier & AddRetType) {
7602  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7603  if (Modifier & VectorizeRetType)
7604  Ty = llvm::FixedVectorType::get(
7605  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7606 
7607  Tys.push_back(Ty);
7608  }
7609 
7610  // Arguments.
7611  if (Modifier & VectorizeArgTypes) {
7612  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7613  ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7614  }
7615 
7616  if (Modifier & (Add1ArgType | Add2ArgTypes))
7617  Tys.push_back(ArgType);
7618 
7619  if (Modifier & Add2ArgTypes)
7620  Tys.push_back(ArgType);
7621 
7622  if (Modifier & InventFloatType)
7623  Tys.push_back(FloatTy);
7624 
7625  return CGM.getIntrinsic(IntrinsicID, Tys);
7626 }
7627 
7629  CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7630  SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7631  unsigned BuiltinID = SISDInfo.BuiltinID;
7632  unsigned int Int = SISDInfo.LLVMIntrinsic;
7633  unsigned Modifier = SISDInfo.TypeModifier;
7634  const char *s = SISDInfo.NameHint;
7635 
7636  switch (BuiltinID) {
7637  case NEON::BI__builtin_neon_vcled_s64:
7638  case NEON::BI__builtin_neon_vcled_u64:
7639  case NEON::BI__builtin_neon_vcles_f32:
7640  case NEON::BI__builtin_neon_vcled_f64:
7641  case NEON::BI__builtin_neon_vcltd_s64:
7642  case NEON::BI__builtin_neon_vcltd_u64:
7643  case NEON::BI__builtin_neon_vclts_f32:
7644  case NEON::BI__builtin_neon_vcltd_f64:
7645  case NEON::BI__builtin_neon_vcales_f32:
7646  case NEON::BI__builtin_neon_vcaled_f64:
7647  case NEON::BI__builtin_neon_vcalts_f32:
7648  case NEON::BI__builtin_neon_vcaltd_f64:
7649  // Only one direction of comparisons actually exist, cmle is actually a cmge
7650  // with swapped operands. The table gives us the right intrinsic but we
7651  // still need to do the swap.
7652  std::swap(Ops[0], Ops[1]);
7653  break;
7654  }
7655 
7656  assert(Int && "Generic code assumes a valid intrinsic");
7657 
7658  // Determine the type(s) of this overloaded AArch64 intrinsic.
7659  const Expr *Arg = E->getArg(0);
7660  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7661  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7662 
7663  int j = 0;
7664  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7665  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7666  ai != ae; ++ai, ++j) {
7667  llvm::Type *ArgTy = ai->getType();
7668  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7669  ArgTy->getPrimitiveSizeInBits())
7670  continue;
7671 
7672  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7673  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7674  // it before inserting.
7675  Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7676  Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7677  Ops[j] =
7678  CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7679  }
7680 
7681  Value *Result = CGF.EmitNeonCall(F, Ops, s);
7682  llvm::Type *ResultType = CGF.ConvertType(E->getType());
7683  if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7684  Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7685  return CGF.Builder.CreateExtractElement(Result, C0);
7686 
7687  return CGF.Builder.CreateBitCast(Result, ResultType, s);
7688 }
7689 
7691  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7692  const char *NameHint, unsigned Modifier, const CallExpr *E,
7693  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7694  llvm::Triple::ArchType Arch) {
7695  // Get the last argument, which specifies the vector type.
7696  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7697  std::optional<llvm::APSInt> NeonTypeConst =
7698  Arg->getIntegerConstantExpr(getContext());
7699  if (!NeonTypeConst)
7700  return nullptr;
7701 
7702  // Determine the type of this overloaded NEON intrinsic.
7703  NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7704  bool Usgn = Type.isUnsigned();
7705  bool Quad = Type.isQuad();
7706  const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7707  const bool AllowBFloatArgsAndRet =
7708  getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7709 
7710  llvm::FixedVectorType *VTy =
7711  GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7712  llvm::Type *Ty = VTy;
7713  if (!Ty)
7714  return nullptr;
7715 
7716  auto getAlignmentValue32 = [&](Address addr) -> Value* {
7717  return Builder.getInt32(addr.getAlignment().getQuantity());
7718  };
7719 
7720  unsigned Int = LLVMIntrinsic;
7721  if ((Modifier & UnsignedAlts) && !Usgn)
7722  Int = AltLLVMIntrinsic;
7723 
7724  switch (BuiltinID) {
7725  default: break;
7726  case NEON::BI__builtin_neon_splat_lane_v:
7727  case NEON::BI__builtin_neon_splat_laneq_v:
7728  case NEON::BI__builtin_neon_splatq_lane_v:
7729  case NEON::BI__builtin_neon_splatq_laneq_v: {
7730  auto NumElements = VTy->getElementCount();
7731  if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7732  NumElements = NumElements * 2;
7733  if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7734  NumElements = NumElements.divideCoefficientBy(2);
7735 
7736  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7737  return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7738  }
7739  case NEON::BI__builtin_neon_vpadd_v:
7740  case NEON::BI__builtin_neon_vpaddq_v:
7741  // We don't allow fp/int overloading of intrinsics.
7742  if (VTy->getElementType()->isFloatingPointTy() &&
7743  Int == Intrinsic::aarch64_neon_addp)
7744  Int = Intrinsic::aarch64_neon_faddp;
7745  break;
7746  case NEON::BI__builtin_neon_vabs_v:
7747  case NEON::BI__builtin_neon_vabsq_v:
7748  if (VTy->getElementType()->isFloatingPointTy())
7749  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7750  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7751  case NEON::BI__builtin_neon_vadd_v:
7752  case NEON::BI__builtin_neon_vaddq_v: {
7753  llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7754  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7755  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7756  Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7757  return Builder.CreateBitCast(Ops[0], Ty);
7758  }
7759  case NEON::BI__builtin_neon_vaddhn_v: {
7760  llvm::FixedVectorType *SrcTy =
7761  llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7762 
7763  // %sum = add <4 x i32> %lhs, %rhs
7764  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7765  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7766  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7767 
7768  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7769  Constant *ShiftAmt =
7770  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7771  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7772 
7773  // %res = trunc <4 x i32> %high to <4 x i16>
7774  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7775  }
7776  case NEON::BI__builtin_neon_vcale_v:
7777  case NEON::BI__builtin_neon_vcaleq_v:
7778  case NEON::BI__builtin_neon_vcalt_v:
7779  case NEON::BI__builtin_neon_vcaltq_v:
7780  std::swap(Ops[0], Ops[1]);
7781  [[fallthrough]];
7782  case NEON::BI__builtin_neon_vcage_v:
7783  case NEON::BI__builtin_neon_vcageq_v:
7784  case NEON::BI__builtin_neon_vcagt_v:
7785  case NEON::BI__builtin_neon_vcagtq_v: {
7786  llvm::Type *Ty;
7787  switch (VTy->getScalarSizeInBits()) {
7788  default: llvm_unreachable("unexpected type");
7789  case 32:
7790  Ty = FloatTy;
7791  break;
7792  case 64:
7793  Ty = DoubleTy;
7794  break;
7795  case 16:
7796  Ty = HalfTy;
7797  break;
7798  }
7799  auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7800  llvm::Type *Tys[] = { VTy, VecFlt };
7801  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7802  return EmitNeonCall(F, Ops, NameHint);
7803  }
7804  case NEON::BI__builtin_neon_vceqz_v:
7805  case NEON::BI__builtin_neon_vceqzq_v:
7806  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7807  ICmpInst::ICMP_EQ, "vceqz");
7808  case NEON::BI__builtin_neon_vcgez_v:
7809  case NEON::BI__builtin_neon_vcgezq_v:
7810  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7811  ICmpInst::ICMP_SGE, "vcgez");
7812  case NEON::BI__builtin_neon_vclez_v:
7813  case NEON::BI__builtin_neon_vclezq_v:
7814  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7815  ICmpInst::ICMP_SLE, "vclez");
7816  case NEON::BI__builtin_neon_vcgtz_v:
7817  case NEON::BI__builtin_neon_vcgtzq_v:
7818  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7819  ICmpInst::ICMP_SGT, "vcgtz");
7820  case NEON::BI__builtin_neon_vcltz_v:
7821  case NEON::BI__builtin_neon_vcltzq_v:
7822  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7823  ICmpInst::ICMP_SLT, "vcltz");
7824  case NEON::BI__builtin_neon_vclz_v:
7825  case NEON::BI__builtin_neon_vclzq_v:
7826  // We generate target-independent intrinsic, which needs a second argument
7827  // for whether or not clz of zero is undefined; on ARM it isn't.
7828  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7829  break;
7830  case NEON::BI__builtin_neon_vcvt_f32_v:
7831  case NEON::BI__builtin_neon_vcvtq_f32_v:
7832  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7833  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7834  HasLegalHalfType);
7835  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7836  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7837  case NEON::BI__builtin_neon_vcvt_f16_s16:
7838  case NEON::BI__builtin_neon_vcvt_f16_u16:
7839  case NEON::BI__builtin_neon_vcvtq_f16_s16:
7840  case NEON::BI__builtin_neon_vcvtq_f16_u16:
7841  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7842  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7843  HasLegalHalfType);
7844  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7845  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7846  case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7847  case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7848  case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7849  case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7850  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7851  Function *F = CGM.getIntrinsic(Int, Tys);
7852  return EmitNeonCall(F, Ops, "vcvt_n");
7853  }
7854  case NEON::BI__builtin_neon_vcvt_n_f32_v:
7855  case NEON::BI__builtin_neon_vcvt_n_f64_v:
7856  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7857  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7858  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7859  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7860  Function *F = CGM.getIntrinsic(Int, Tys);
7861  return EmitNeonCall(F, Ops, "vcvt_n");
7862  }
7863  case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7864  case NEON::BI__builtin_neon_vcvt_n_s32_v:
7865  case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7866  case NEON::BI__builtin_neon_vcvt_n_u32_v:
7867  case NEON::BI__builtin_neon_vcvt_n_s64_v:
7868  case NEON::BI__builtin_neon_vcvt_n_u64_v:
7869  case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7870  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7871  case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7872  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7873  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7874  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7875  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7876  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7877  return EmitNeonCall(F, Ops, "vcvt_n");
7878  }
7879  case NEON::BI__builtin_neon_vcvt_s32_v:
7880  case NEON::BI__builtin_neon_vcvt_u32_v:
7881  case NEON::BI__builtin_neon_vcvt_s64_v:
7882  case NEON::BI__builtin_neon_vcvt_u64_v:
7883  case NEON::BI__builtin_neon_vcvt_s16_f16:
7884  case NEON::BI__builtin_neon_vcvt_u16_f16:
7885  case NEON::BI__builtin_neon_vcvtq_s32_v:
7886  case NEON::BI__builtin_neon_vcvtq_u32_v:
7887  case NEON::BI__builtin_neon_vcvtq_s64_v:
7888  case NEON::BI__builtin_neon_vcvtq_u64_v:
7889  case NEON::BI__builtin_neon_vcvtq_s16_f16:
7890  case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7891  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7892  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7893  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7894  }
7895  case NEON::BI__builtin_neon_vcvta_s16_f16:
7896  case NEON::BI__builtin_neon_vcvta_s32_v:
7897  case NEON::BI__builtin_neon_vcvta_s64_v:
7898  case NEON::BI__builtin_neon_vcvta_u16_f16:
7899  case NEON::BI__builtin_neon_vcvta_u32_v:
7900  case NEON::BI__builtin_neon_vcvta_u64_v:
7901  case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7902  case NEON::BI__builtin_neon_vcvtaq_s32_v:
7903  case NEON::BI__builtin_neon_vcvtaq_s64_v:
7904  case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7905  case NEON::BI__builtin_neon_vcvtaq_u32_v:
7906  case NEON::BI__builtin_neon_vcvtaq_u64_v:
7907  case NEON::BI__builtin_neon_vcvtn_s16_f16:
7908  case NEON::BI__builtin_neon_vcvtn_s32_v:
7909  case NEON::BI__builtin_neon_vcvtn_s64_v:
7910  case NEON::BI__builtin_neon_vcvtn_u16_f16:
7911  case NEON::BI__builtin_neon_vcvtn_u32_v:
7912  case NEON::BI__builtin_neon_vcvtn_u64_v:
7913  case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7914  case NEON::BI__builtin_neon_vcvtnq_s32_v:
7915  case NEON::BI__builtin_neon_vcvtnq_s64_v:
7916  case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7917  case NEON::BI__builtin_neon_vcvtnq_u32_v:
7918  case NEON::BI__builtin_neon_vcvtnq_u64_v:
7919  case NEON::BI__builtin_neon_vcvtp_s16_f16:
7920  case NEON::BI__builtin_neon_vcvtp_s32_v:
7921  case NEON::BI__builtin_neon_vcvtp_s64_v:
7922  case NEON::BI__builtin_neon_vcvtp_u16_f16:
7923  case NEON::BI__builtin_neon_vcvtp_u32_v:
7924  case NEON::BI__builtin_neon_vcvtp_u64_v:
7925  case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7926  case NEON::BI__builtin_neon_vcvtpq_s32_v:
7927  case NEON::BI__builtin_neon_vcvtpq_s64_v:
7928  case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7929  case NEON::BI__builtin_neon_vcvtpq_u32_v:
7930  case NEON::BI__builtin_neon_vcvtpq_u64_v:
7931  case NEON::BI__builtin_neon_vcvtm_s16_f16:
7932  case NEON::BI__builtin_neon_vcvtm_s32_v:
7933  case NEON::BI__builtin_neon_vcvtm_s64_v:
7934  case NEON::BI__builtin_neon_vcvtm_u16_f16:
7935  case NEON::BI__builtin_neon_vcvtm_u32_v:
7936  case NEON::BI__builtin_neon_vcvtm_u64_v:
7937  case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7938  case NEON::BI__builtin_neon_vcvtmq_s32_v:
7939  case NEON::BI__builtin_neon_vcvtmq_s64_v:
7940  case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7941  case NEON::BI__builtin_neon_vcvtmq_u32_v:
7942  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7943  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7944  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7945  }
7946  case NEON::BI__builtin_neon_vcvtx_f32_v: {
7947  llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7948  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7949 
7950  }
7951  case NEON::BI__builtin_neon_vext_v:
7952  case NEON::BI__builtin_neon_vextq_v: {
7953  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7954  SmallVector<int, 16> Indices;
7955  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7956  Indices.push_back(i+CV);
7957 
7958  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7959  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7960  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7961  }
7962  case NEON::BI__builtin_neon_vfma_v:
7963  case NEON::BI__builtin_neon_vfmaq_v: {
7964  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7965  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7966  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7967 
7968  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7970  *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7971  {Ops[1], Ops[2], Ops[0]});
7972  }
7973  case NEON::BI__builtin_neon_vld1_v:
7974  case NEON::BI__builtin_neon_vld1q_v: {
7975  llvm::Type *Tys[] = {Ty, Int8PtrTy};
7976  Ops.push_back(getAlignmentValue32(PtrOp0));
7977  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7978  }
7979  case NEON::BI__builtin_neon_vld1_x2_v:
7980  case NEON::BI__builtin_neon_vld1q_x2_v:
7981  case NEON::BI__builtin_neon_vld1_x3_v:
7982  case NEON::BI__builtin_neon_vld1q_x3_v:
7983  case NEON::BI__builtin_neon_vld1_x4_v:
7984  case NEON::BI__builtin_neon_vld1q_x4_v: {
7985  llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7986  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7987  Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7988  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7989  }
7990  case NEON::BI__builtin_neon_vld2_v:
7991  case NEON::BI__builtin_neon_vld2q_v:
7992  case NEON::BI__builtin_neon_vld3_v:
7993  case NEON::BI__builtin_neon_vld3q_v:
7994  case NEON::BI__builtin_neon_vld4_v:
7995  case NEON::BI__builtin_neon_vld4q_v:
7996  case NEON::BI__builtin_neon_vld2_dup_v:
7997  case NEON::BI__builtin_neon_vld2q_dup_v:
7998  case NEON::BI__builtin_neon_vld3_dup_v:
7999  case NEON::BI__builtin_neon_vld3q_dup_v:
8000  case NEON::BI__builtin_neon_vld4_dup_v:
8001  case NEON::BI__builtin_neon_vld4q_dup_v: {
8002  llvm::Type *Tys[] = {Ty, Int8PtrTy};
8003  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8004  Value *Align = getAlignmentValue32(PtrOp1);
8005  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8006  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8007  }
8008  case NEON::BI__builtin_neon_vld1_dup_v:
8009  case NEON::BI__builtin_neon_vld1q_dup_v: {
8010  Value *V = PoisonValue::get(Ty);
8011  PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8012  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8013  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8014  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8015  return EmitNeonSplat(Ops[0], CI);
8016  }
8017  case NEON::BI__builtin_neon_vld2_lane_v:
8018  case NEON::BI__builtin_neon_vld2q_lane_v:
8019  case NEON::BI__builtin_neon_vld3_lane_v:
8020  case NEON::BI__builtin_neon_vld3q_lane_v:
8021  case NEON::BI__builtin_neon_vld4_lane_v:
8022  case NEON::BI__builtin_neon_vld4q_lane_v: {
8023  llvm::Type *Tys[] = {Ty, Int8PtrTy};
8024  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8025  for (unsigned I = 2; I < Ops.size() - 1; ++I)
8026  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8027  Ops.push_back(getAlignmentValue32(PtrOp1));
8028  Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8029  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8030  }
8031  case NEON::BI__builtin_neon_vmovl_v: {
8032  llvm::FixedVectorType *DTy =
8033  llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8034  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8035  if (Usgn)
8036  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8037  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8038  }
8039  case NEON::BI__builtin_neon_vmovn_v: {
8040  llvm::FixedVectorType *QTy =
8041  llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8042  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8043  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8044  }
8045  case NEON::BI__builtin_neon_vmull_v:
8046  // FIXME: the integer vmull operations could be emitted in terms of pure
8047  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8048  // hoisting the exts outside loops. Until global ISel comes along that can
8049  // see through such movement this leads to bad CodeGen. So we need an
8050  // intrinsic for now.
8051  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8052  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8053  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8054  case NEON::BI__builtin_neon_vpadal_v:
8055  case NEON::BI__builtin_neon_vpadalq_v: {
8056  // The source operand type has twice as many elements of half the size.
8057  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8058  llvm::Type *EltTy =
8059  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8060  auto *NarrowTy =
8061  llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8062  llvm::Type *Tys[2] = { Ty, NarrowTy };
8063  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8064  }
8065  case NEON::BI__builtin_neon_vpaddl_v:
8066  case NEON::BI__builtin_neon_vpaddlq_v: {
8067  // The source operand type has twice as many elements of half the size.
8068  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8069  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8070  auto *NarrowTy =
8071  llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8072  llvm::Type *Tys[2] = { Ty, NarrowTy };
8073  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8074  }
8075  case NEON::BI__builtin_neon_vqdmlal_v:
8076  case NEON::BI__builtin_neon_vqdmlsl_v: {
8077  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8078  Ops[1] =
8079  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8080  Ops.resize(2);
8081  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8082  }
8083  case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8084  case NEON::BI__builtin_neon_vqdmulh_lane_v:
8085  case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8086  case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8087  auto *RTy = cast<llvm::FixedVectorType>(Ty);
8088  if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8089  BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8090  RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8091  RTy->getNumElements() * 2);
8092  llvm::Type *Tys[2] = {
8093  RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8094  /*isQuad*/ false))};
8095  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8096  }
8097  case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8098  case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8099  case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8100  case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8101  llvm::Type *Tys[2] = {
8102  Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8103  /*isQuad*/ true))};
8104  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8105  }
8106  case NEON::BI__builtin_neon_vqshl_n_v:
8107  case NEON::BI__builtin_neon_vqshlq_n_v:
8108  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8109  1, false);
8110  case NEON::BI__builtin_neon_vqshlu_n_v:
8111  case NEON::BI__builtin_neon_vqshluq_n_v:
8112  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8113  1, false);
8114  case NEON::BI__builtin_neon_vrecpe_v:
8115  case NEON::BI__builtin_neon_vrecpeq_v:
8116  case NEON::BI__builtin_neon_vrsqrte_v:
8117  case NEON::BI__builtin_neon_vrsqrteq_v:
8118  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8119  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8120  case NEON::BI__builtin_neon_vrndi_v:
8121  case NEON::BI__builtin_neon_vrndiq_v:
8122  Int = Builder.getIsFPConstrained()
8123  ? Intrinsic::experimental_constrained_nearbyint
8125  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8126  case NEON::BI__builtin_neon_vrshr_n_v:
8127  case NEON::BI__builtin_neon_vrshrq_n_v:
8128  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8129  1, true);
8130  case NEON::BI__builtin_neon_vsha512hq_u64:
8131  case NEON::BI__builtin_neon_vsha512h2q_u64:
8132  case NEON::BI__builtin_neon_vsha512su0q_u64:
8133  case NEON::BI__builtin_neon_vsha512su1q_u64: {
8134  Function *F = CGM.getIntrinsic(Int);
8135  return EmitNeonCall(F, Ops, "");
8136  }
8137  case NEON::BI__builtin_neon_vshl_n_v:
8138  case NEON::BI__builtin_neon_vshlq_n_v:
8139  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8140  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8141  "vshl_n");
8142  case NEON::BI__builtin_neon_vshll_n_v: {
8143  llvm::FixedVectorType *SrcTy =
8144  llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8145  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8146  if (Usgn)
8147  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8148  else
8149  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8150  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8151  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8152  }
8153  case NEON::BI__builtin_neon_vshrn_n_v: {
8154  llvm::FixedVectorType *SrcTy =
8155  llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8156  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8157  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8158  if (Usgn)
8159  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8160  else
8161  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8162  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8163  }
8164  case NEON::BI__builtin_neon_vshr_n_v:
8165  case NEON::BI__builtin_neon_vshrq_n_v:
8166  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8167  case NEON::BI__builtin_neon_vst1_v:
8168  case NEON::BI__builtin_neon_vst1q_v:
8169  case NEON::BI__builtin_neon_vst2_v:
8170  case NEON::BI__builtin_neon_vst2q_v:
8171  case NEON::BI__builtin_neon_vst3_v:
8172  case NEON::BI__builtin_neon_vst3q_v:
8173  case NEON::BI__builtin_neon_vst4_v:
8174  case NEON::BI__builtin_neon_vst4q_v:
8175  case NEON::BI__builtin_neon_vst2_lane_v:
8176  case NEON::BI__builtin_neon_vst2q_lane_v:
8177  case NEON::BI__builtin_neon_vst3_lane_v:
8178  case NEON::BI__builtin_neon_vst3q_lane_v:
8179  case NEON::BI__builtin_neon_vst4_lane_v:
8180  case NEON::BI__builtin_neon_vst4q_lane_v: {
8181  llvm::Type *Tys[] = {Int8PtrTy, Ty};
8182  Ops.push_back(getAlignmentValue32(PtrOp0));
8183  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8184  }
8185  case NEON::BI__builtin_neon_vsm3partw1q_u32:
8186  case NEON::BI__builtin_neon_vsm3partw2q_u32:
8187  case NEON::BI__builtin_neon_vsm3ss1q_u32:
8188  case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8189  case NEON::BI__builtin_neon_vsm4eq_u32: {
8190  Function *F = CGM.getIntrinsic(Int);
8191  return EmitNeonCall(F, Ops, "");
8192  }
8193  case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8194  case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8195  case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8196  case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8197  Function *F = CGM.getIntrinsic(Int);
8198  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8199  return EmitNeonCall(F, Ops, "");
8200  }
8201  case NEON::BI__builtin_neon_vst1_x2_v:
8202  case NEON::BI__builtin_neon_vst1q_x2_v:
8203  case NEON::BI__builtin_neon_vst1_x3_v:
8204  case NEON::BI__builtin_neon_vst1q_x3_v:
8205  case NEON::BI__builtin_neon_vst1_x4_v:
8206  case NEON::BI__builtin_neon_vst1q_x4_v: {
8207  // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8208  // in AArch64 it comes last. We may want to stick to one or another.
8209  if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8210  Arch == llvm::Triple::aarch64_32) {
8211  llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8212  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8213  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8214  }
8215  llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8216  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8217  }
8218  case NEON::BI__builtin_neon_vsubhn_v: {
8219  llvm::FixedVectorType *SrcTy =
8220  llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8221 
8222  // %sum = add <4 x i32> %lhs, %rhs
8223  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8224  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8225  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8226 
8227  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8228  Constant *ShiftAmt =
8229  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8230  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8231 
8232  // %res = trunc <4 x i32> %high to <4 x i16>
8233  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8234  }
8235  case NEON::BI__builtin_neon_vtrn_v:
8236  case NEON::BI__builtin_neon_vtrnq_v: {
8237  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8238  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8239  Value *SV = nullptr;
8240 
8241  for (unsigned vi = 0; vi != 2; ++vi) {
8242  SmallVector<int, 16> Indices;
8243  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8244  Indices.push_back(i+vi);
8245  Indices.push_back(i+e+vi);
8246  }
8247  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8248  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8249  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8250  }
8251  return SV;
8252  }
8253  case NEON::BI__builtin_neon_vtst_v:
8254  case NEON::BI__builtin_neon_vtstq_v: {
8255  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8256  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8257  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8258  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8259  ConstantAggregateZero::get(Ty));
8260  return Builder.CreateSExt(Ops[0], Ty, "vtst");
8261  }
8262  case NEON::BI__builtin_neon_vuzp_v:
8263  case NEON::BI__builtin_neon_vuzpq_v: {
8264  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8265  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8266  Value *SV = nullptr;
8267 
8268  for (unsigned vi = 0; vi != 2; ++vi) {
8269  SmallVector<int, 16> Indices;
8270  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8271  Indices.push_back(2*i+vi);
8272 
8273  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8274  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8275  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8276  }
8277  return SV;
8278  }
8279  case NEON::BI__builtin_neon_vxarq_u64: {
8280  Function *F = CGM.getIntrinsic(Int);
8281  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8282  return EmitNeonCall(F, Ops, "");
8283  }
8284  case NEON::BI__builtin_neon_vzip_v:
8285  case NEON::BI__builtin_neon_vzipq_v: {
8286  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8287  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8288  Value *SV = nullptr;
8289 
8290  for (unsigned vi = 0; vi != 2; ++vi) {
8291  SmallVector<int, 16> Indices;
8292  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8293  Indices.push_back((i + vi*e) >> 1);
8294  Indices.push_back(((i + vi*e) >> 1)+e);
8295  }
8296  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8297  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8298  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8299  }
8300  return SV;
8301  }
8302  case NEON::BI__builtin_neon_vdot_s32:
8303  case NEON::BI__builtin_neon_vdot_u32:
8304  case NEON::BI__builtin_neon_vdotq_s32:
8305  case NEON::BI__builtin_neon_vdotq_u32: {
8306  auto *InputTy =
8307  llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8308  llvm::Type *Tys[2] = { Ty, InputTy };
8309  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8310  }
8311  case NEON::BI__builtin_neon_vfmlal_low_f16:
8312  case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8313  auto *InputTy =
8314  llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8315  llvm::Type *Tys[2] = { Ty, InputTy };
8316  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8317  }
8318  case NEON::BI__builtin_neon_vfmlsl_low_f16:
8319  case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8320  auto *InputTy =
8321  llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8322  llvm::Type *Tys[2] = { Ty, InputTy };
8323  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8324  }
8325  case NEON::BI__builtin_neon_vfmlal_high_f16:
8326  case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8327  auto *InputTy =
8328  llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8329  llvm::Type *Tys[2] = { Ty, InputTy };
8330  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8331  }
8332  case NEON::BI__builtin_neon_vfmlsl_high_f16:
8333  case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8334  auto *InputTy =
8335  llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8336  llvm::Type *Tys[2] = { Ty, InputTy };
8337  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8338  }
8339  case NEON::BI__builtin_neon_vmmlaq_s32:
8340  case NEON::BI__builtin_neon_vmmlaq_u32: {
8341  auto *InputTy =
8342  llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8343  llvm::Type *Tys[2] = { Ty, InputTy };
8344  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8345  }
8346  case NEON::BI__builtin_neon_vusmmlaq_s32: {
8347  auto *InputTy =
8348  llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8349  llvm::Type *Tys[2] = { Ty, InputTy };
8350  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8351  }
8352  case NEON::BI__builtin_neon_vusdot_s32:
8353  case NEON::BI__builtin_neon_vusdotq_s32: {
8354  auto *InputTy =
8355  llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8356  llvm::Type *Tys[2] = { Ty, InputTy };
8357  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8358  }
8359  case NEON::BI__builtin_neon_vbfdot_f32:
8360  case NEON::BI__builtin_neon_vbfdotq_f32: {
8361  llvm::Type *InputTy =
8362  llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8363  llvm::Type *Tys[2] = { Ty, InputTy };
8364  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8365  }
8366  case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8367  llvm::Type *Tys[1] = { Ty };
8368  Function *F = CGM.getIntrinsic(Int, Tys);
8369  return EmitNeonCall(F, Ops, "vcvtfp2bf");
8370  }
8371 
8372  }
8373 
8374  assert(Int && "Expected valid intrinsic number");
8375 
8376  // Determine the type(s) of this overloaded AArch64 intrinsic.
8377  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8378 
8379  Value *Result = EmitNeonCall(F, Ops, NameHint);
8380  llvm::Type *ResultType = ConvertType(E->getType());
8381  // AArch64 intrinsic one-element vector type cast to
8382  // scalar type expected by the builtin
8383  return Builder.CreateBitCast(Result, ResultType, NameHint);
8384 }
8385 
8387  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8388  const CmpInst::Predicate Ip, const Twine &Name) {
8389  llvm::Type *OTy = Op->getType();
8390 
8391  // FIXME: this is utterly horrific. We should not be looking at previous
8392  // codegen context to find out what needs doing. Unfortunately TableGen
8393  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8394  // (etc).
8395  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8396  OTy = BI->getOperand(0)->getType();
8397 
8398  Op = Builder.CreateBitCast(Op, OTy);
8399  if (OTy->getScalarType()->isFloatingPointTy()) {
8400  if (Fp == CmpInst::FCMP_OEQ)
8401  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8402  else
8403  Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8404  } else {
8405  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8406  }
8407  return Builder.CreateSExt(Op, Ty, Name);
8408 }
8409 
8411  Value *ExtOp, Value *IndexOp,
8412  llvm::Type *ResTy, unsigned IntID,
8413  const char *Name) {
8414  SmallVector<Value *, 2> TblOps;
8415  if (ExtOp)
8416  TblOps.push_back(ExtOp);
8417 
8418  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8419  SmallVector<int, 16> Indices;
8420  auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8421  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8422  Indices.push_back(2*i);
8423  Indices.push_back(2*i+1);
8424  }
8425 
8426  int PairPos = 0, End = Ops.size() - 1;
8427  while (PairPos < End) {
8428  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8429  Ops[PairPos+1], Indices,
8430  Name));
8431  PairPos += 2;
8432  }
8433 
8434  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8435  // of the 128-bit lookup table with zero.
8436  if (PairPos == End) {
8437  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8438  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8439  ZeroTbl, Indices, Name));
8440  }
8441 
8442  Function *TblF;
8443  TblOps.push_back(IndexOp);
8444  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8445 
8446  return CGF.EmitNeonCall(TblF, TblOps, Name);
8447 }
8448 
8449 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8450  unsigned Value;
8451  switch (BuiltinID) {
8452  default:
8453  return nullptr;
8454  case clang::ARM::BI__builtin_arm_nop:
8455  Value = 0;
8456  break;
8457  case clang::ARM::BI__builtin_arm_yield:
8458  case clang::ARM::BI__yield:
8459  Value = 1;
8460  break;
8461  case clang::ARM::BI__builtin_arm_wfe:
8462  case clang::ARM::BI__wfe:
8463  Value = 2;
8464  break;
8465  case clang::ARM::BI__builtin_arm_wfi:
8466  case clang::ARM::BI__wfi:
8467  Value = 3;
8468  break;
8469  case clang::ARM::BI__builtin_arm_sev:
8470  case clang::ARM::BI__sev:
8471  Value = 4;
8472  break;
8473  case clang::ARM::BI__builtin_arm_sevl:
8474  case clang::ARM::BI__sevl:
8475  Value = 5;
8476  break;
8477  }
8478 
8479  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8480  llvm::ConstantInt::get(Int32Ty, Value));
8481 }
8482 
8487 };
8488 
8489 // Generates the IR for __builtin_read_exec_*.
8490 // Lowers the builtin to amdgcn_ballot intrinsic.
8492  llvm::Type *RegisterType,
8493  llvm::Type *ValueType, bool isExecHi) {
8494  CodeGen::CGBuilderTy &Builder = CGF.Builder;
8495  CodeGen::CodeGenModule &CGM = CGF.CGM;
8496 
8497  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8498  llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8499 
8500  if (isExecHi) {
8501  Value *Rt2 = Builder.CreateLShr(Call, 32);
8502  Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8503  return Rt2;
8504  }
8505 
8506  return Call;
8507 }
8508 
8509 // Generates the IR for the read/write special register builtin,
8510 // ValueType is the type of the value that is to be written or read,
8511 // RegisterType is the type of the register being written to or read from.
8513  const CallExpr *E,
8514  llvm::Type *RegisterType,
8515  llvm::Type *ValueType,
8517  StringRef SysReg = "") {
8518  // write and register intrinsics only support 32, 64 and 128 bit operations.
8519  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8520  RegisterType->isIntegerTy(128)) &&
8521  "Unsupported size for register.");
8522 
8523  CodeGen::CGBuilderTy &Builder = CGF.Builder;
8524  CodeGen::CodeGenModule &CGM = CGF.CGM;
8525  LLVMContext &Context = CGM.getLLVMContext();
8526 
8527  if (SysReg.empty()) {
8528  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8529  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8530  }
8531 
8532  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8533  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8534  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8535 
8536  llvm::Type *Types[] = { RegisterType };
8537 
8538  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8539  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8540  && "Can't fit 64-bit value in 32-bit register");
8541 
8542  if (AccessKind != Write) {
8543  assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8544  llvm::Function *F = CGM.getIntrinsic(
8545  AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8546  : llvm::Intrinsic::read_register,
8547  Types);
8548  llvm::Value *Call = Builder.CreateCall(F, Metadata);
8549 
8550  if (MixedTypes)
8551  // Read into 64 bit register and then truncate result to 32 bit.
8552  return Builder.CreateTrunc(Call, ValueType);
8553 
8554  if (ValueType->isPointerTy())
8555  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8556  return Builder.CreateIntToPtr(Call, ValueType);
8557 
8558  return Call;
8559  }
8560 
8561  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8562  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8563  if (MixedTypes) {
8564  // Extend 32 bit write value to 64 bit to pass to write.
8565  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8566  return Builder.CreateCall(F, { Metadata, ArgValue });
8567  }
8568 
8569  if (ValueType->isPointerTy()) {
8570  // Have VoidPtrTy ArgValue but want to return an i32/i64.
8571  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8572  return Builder.CreateCall(F, { Metadata, ArgValue });
8573  }
8574 
8575  return Builder.CreateCall(F, { Metadata, ArgValue });
8576 }
8577 
8578 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8579 /// argument that specifies the vector type.
8580 static bool HasExtraNeonArgument(unsigned BuiltinID) {
8581  switch (BuiltinID) {
8582  default: break;
8583  case NEON::BI__builtin_neon_vget_lane_i8:
8584  case NEON::BI__builtin_neon_vget_lane_i16:
8585  case NEON::BI__builtin_neon_vget_lane_bf16:
8586  case NEON::BI__builtin_neon_vget_lane_i32:
8587  case NEON::BI__builtin_neon_vget_lane_i64:
8588  case NEON::BI__builtin_neon_vget_lane_f32:
8589  case NEON::BI__builtin_neon_vgetq_lane_i8:
8590  case NEON::BI__builtin_neon_vgetq_lane_i16:
8591  case NEON::BI__builtin_neon_vgetq_lane_bf16:
8592  case NEON::BI__builtin_neon_vgetq_lane_i32:
8593  case NEON::BI__builtin_neon_vgetq_lane_i64:
8594  case NEON::BI__builtin_neon_vgetq_lane_f32:
8595  case NEON::BI__builtin_neon_vduph_lane_bf16:
8596  case NEON::BI__builtin_neon_vduph_laneq_bf16:
8597  case NEON::BI__builtin_neon_vset_lane_i8:
8598  case NEON::BI__builtin_neon_vset_lane_i16:
8599  case NEON::BI__builtin_neon_vset_lane_bf16:
8600  case NEON::BI__builtin_neon_vset_lane_i32:
8601  case NEON::BI__builtin_neon_vset_lane_i64:
8602  case NEON::BI__builtin_neon_vset_lane_f32:
8603  case NEON::BI__builtin_neon_vsetq_lane_i8:
8604  case NEON::BI__builtin_neon_vsetq_lane_i16:
8605  case NEON::BI__builtin_neon_vsetq_lane_bf16:
8606  case NEON::BI__builtin_neon_vsetq_lane_i32:
8607  case NEON::BI__builtin_neon_vsetq_lane_i64:
8608  case NEON::BI__builtin_neon_vsetq_lane_f32:
8609  case NEON::BI__builtin_neon_vsha1h_u32:
8610  case NEON::BI__builtin_neon_vsha1cq_u32:
8611  case NEON::BI__builtin_neon_vsha1pq_u32:
8612  case NEON::BI__builtin_neon_vsha1mq_u32:
8613  case NEON::BI__builtin_neon_vcvth_bf16_f32:
8614  case clang::ARM::BI_MoveToCoprocessor:
8615  case clang::ARM::BI_MoveToCoprocessor2:
8616  return false;
8617  }
8618  return true;
8619 }
8620 
8622  const CallExpr *E,
8623  ReturnValueSlot ReturnValue,
8624  llvm::Triple::ArchType Arch) {
8625  if (auto Hint = GetValueForARMHint(BuiltinID))
8626  return Hint;
8627 
8628  if (BuiltinID == clang::ARM::BI__emit) {
8629  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8630  llvm::FunctionType *FTy =
8631  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8632 
8633  Expr::EvalResult Result;
8634  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8635  llvm_unreachable("Sema will ensure that the parameter is constant");
8636 
8637  llvm::APSInt Value = Result.Val.getInt();
8638  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8639 
8640  llvm::InlineAsm *Emit =
8641  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8642  /*hasSideEffects=*/true)
8643  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8644  /*hasSideEffects=*/true);
8645 
8646  return Builder.CreateCall(Emit);
8647  }
8648 
8649  if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8650  Value *Option = EmitScalarExpr(E->getArg(0));
8651  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8652  }
8653 
8654  if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8655  Value *Address = EmitScalarExpr(E->getArg(0));
8656  Value *RW = EmitScalarExpr(E->getArg(1));
8657  Value *IsData = EmitScalarExpr(E->getArg(2));
8658 
8659  // Locality is not supported on ARM target
8660  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8661 
8662  Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8663  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8664  }
8665 
8666  if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8667  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8668  return Builder.CreateCall(
8669  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8670  }
8671 
8672  if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8673  BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8674  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8675  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8676  Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8677  if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8678  Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8679  return Res;
8680  }
8681 
8682 
8683  if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8684  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8685  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8686  }
8687  if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8688  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8689  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8690  "cls");
8691  }
8692 
8693  if (BuiltinID == clang::ARM::BI__clear_cache) {
8694  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8695  const FunctionDecl *FD = E->getDirectCallee();
8696  Value *Ops[2];
8697  for (unsigned i = 0; i < 2; i++)
8698  Ops[i] = EmitScalarExpr(E->getArg(i));
8699  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8700  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8701  StringRef Name = FD->getName();
8702  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8703  }
8704 
8705  if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8706  BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8707  Function *F;
8708 
8709  switch (BuiltinID) {
8710  default: llvm_unreachable("unexpected builtin");
8711  case clang::ARM::BI__builtin_arm_mcrr:
8712  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8713  break;
8714  case clang::ARM::BI__builtin_arm_mcrr2:
8715  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8716  break;
8717  }
8718 
8719  // MCRR{2} instruction has 5 operands but
8720  // the intrinsic has 4 because Rt and Rt2
8721  // are represented as a single unsigned 64
8722  // bit integer in the intrinsic definition
8723  // but internally it's represented as 2 32
8724  // bit integers.
8725 
8726  Value *Coproc = EmitScalarExpr(E->getArg(0));
8727  Value *Opc1 = EmitScalarExpr(E->getArg(1));
8728  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8729  Value *CRm = EmitScalarExpr(E->getArg(3));
8730 
8731  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8732  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8733  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8734  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8735 
8736  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8737  }
8738 
8739  if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8740  BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8741  Function *F;
8742 
8743  switch (BuiltinID) {
8744  default: llvm_unreachable("unexpected builtin");
8745  case clang::ARM::BI__builtin_arm_mrrc:
8746  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8747  break;
8748  case clang::ARM::BI__builtin_arm_mrrc2:
8749  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8750  break;
8751  }
8752 
8753  Value *Coproc = EmitScalarExpr(E->getArg(0));
8754  Value *Opc1 = EmitScalarExpr(E->getArg(1));
8755  Value *CRm = EmitScalarExpr(E->getArg(2));
8756  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8757 
8758  // Returns an unsigned 64 bit integer, represented
8759  // as two 32 bit integers.
8760 
8761  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8762  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8763  Rt = Builder.CreateZExt(Rt, Int64Ty);
8764  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8765 
8766  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8767  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8768  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8769 
8770  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8771  }
8772 
8773  if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8774  ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8775  BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8776  getContext().getTypeSize(E->getType()) == 64) ||
8777  BuiltinID == clang::ARM::BI__ldrexd) {
8778  Function *F;
8779 
8780  switch (BuiltinID) {
8781  default: llvm_unreachable("unexpected builtin");
8782  case clang::ARM::BI__builtin_arm_ldaex:
8783  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8784  break;
8785  case clang::ARM::BI__builtin_arm_ldrexd:
8786  case clang::ARM::BI__builtin_arm_ldrex:
8787  case clang::ARM::BI__ldrexd:
8788  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8789  break;
8790  }
8791 
8792  Value *LdPtr = EmitScalarExpr(E->getArg(0));
8793  Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8794 
8795  Value *Val0 = Builder.CreateExtractValue(Val, 1);
8796  Value *Val1 = Builder.CreateExtractValue(Val, 0);
8797  Val0 = Builder.CreateZExt(Val0, Int64Ty);
8798  Val1 = Builder.CreateZExt(Val1, Int64Ty);
8799 
8800  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8801  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8802  Val = Builder.CreateOr(Val, Val1);
8803  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8804  }
8805 
8806  if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8807  BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8808  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8809 
8810  QualType Ty = E->getType();
8811  llvm::Type *RealResTy = ConvertType(Ty);
8812  llvm::Type *IntTy =
8813  llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8814 
8815  Function *F = CGM.getIntrinsic(
8816  BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8817  : Intrinsic::arm_ldrex,
8818  UnqualPtrTy);
8819  CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8820  Val->addParamAttr(
8821  0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8822 
8823  if (RealResTy->isPointerTy())
8824  return Builder.CreateIntToPtr(Val, RealResTy);
8825  else {
8826  llvm::Type *IntResTy = llvm::IntegerType::get(
8827  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8828  return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8829  RealResTy);
8830  }
8831  }
8832 
8833  if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8834  ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8835  BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8836  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8837  Function *F = CGM.getIntrinsic(
8838  BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8839  : Intrinsic::arm_strexd);
8840  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8841 
8842  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8843  Value *Val = EmitScalarExpr(E->getArg(0));
8844  Builder.CreateStore(Val, Tmp);
8845 
8846  Address LdPtr = Tmp.withElementType(STy);
8847  Val = Builder.CreateLoad(LdPtr);
8848 
8849  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8850  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8851  Value *StPtr = EmitScalarExpr(E->getArg(1));
8852  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8853  }
8854 
8855  if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8856  BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8857  Value *StoreVal = EmitScalarExpr(E->getArg(0));
8858  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8859 
8860  QualType Ty = E->getArg(0)->getType();
8861 
8862  llvm::Type *StoreTy =
8863  llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8864 
8865  if (StoreVal->getType()->isPointerTy())
8866  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8867  else {
8868  llvm::Type *IntTy = llvm::IntegerType::get(
8869  getLLVMContext(),
8870  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8871  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8872  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8873  }
8874 
8875  Function *F = CGM.getIntrinsic(
8876  BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8877  : Intrinsic::arm_strex,
8878  StoreAddr->getType());
8879 
8880  CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8881  CI->addParamAttr(
8882  1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8883  return CI;
8884  }
8885 
8886  if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8887  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8888  return Builder.CreateCall(F);
8889  }
8890 
8891  // CRC32
8892  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8893  switch (BuiltinID) {
8894  case clang::ARM::BI__builtin_arm_crc32b:
8895  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8896  case clang::ARM::BI__builtin_arm_crc32cb:
8897  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8898  case clang::ARM::BI__builtin_arm_crc32h:
8899  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8900  case clang::ARM::BI__builtin_arm_crc32ch:
8901  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8902  case clang::ARM::BI__builtin_arm_crc32w:
8903  case clang::ARM::BI__builtin_arm_crc32d:
8904  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8905  case clang::ARM::BI__builtin_arm_crc32cw:
8906  case clang::ARM::BI__builtin_arm_crc32cd:
8907  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8908  }
8909 
8910  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8911  Value *Arg0 = EmitScalarExpr(E->getArg(0));
8912  Value *Arg1 = EmitScalarExpr(E->getArg(1));
8913 
8914  // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8915  // intrinsics, hence we need different codegen for these cases.
8916  if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8917  BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8918  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8919  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8920  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8921  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8922 
8923  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8924  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8925  return Builder.CreateCall(F, {Res, Arg1b});
8926  } else {
8927  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8928 
8929  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8930  return Builder.CreateCall(F, {Arg0, Arg1});
8931  }
8932  }
8933 
8934  if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8935  BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8936  BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8937  BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8938  BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8939  BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8940 
8942  if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8943  BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8944  BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8946 
8947  bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8948  BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8949 
8950  bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8951  BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8952 
8953  llvm::Type *ValueType;
8954  llvm::Type *RegisterType;
8955  if (IsPointerBuiltin) {
8956  ValueType = VoidPtrTy;
8957  RegisterType = Int32Ty;
8958  } else if (Is64Bit) {
8959  ValueType = RegisterType = Int64Ty;
8960  } else {
8961  ValueType = RegisterType = Int32Ty;
8962  }
8963 
8964  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8965  AccessKind);
8966  }
8967 
8968  if (BuiltinID == ARM::BI__builtin_sponentry) {
8969  llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8970  return Builder.CreateCall(F);
8971  }
8972 
8973  // Handle MSVC intrinsics before argument evaluation to prevent double
8974  // evaluation.
8975  if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8976  return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8977 
8978  // Deal with MVE builtins
8979  if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8980  return Result;
8981  // Handle CDE builtins
8982  if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8983  return Result;
8984 
8985  // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8986  auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8987  return P.first == BuiltinID;
8988  });
8989  if (It != end(NEONEquivalentIntrinsicMap))
8990  BuiltinID = It->second;
8991 
8992  // Find out if any arguments are required to be integer constant
8993  // expressions.
8994  unsigned ICEArguments = 0;
8996  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8997  assert(Error == ASTContext::GE_None && "Should not codegen an error");
8998 
8999  auto getAlignmentValue32 = [&](Address addr) -> Value* {
9000  return Builder.getInt32(addr.getAlignment().getQuantity());
9001  };
9002 
9003  Address PtrOp0 = Address::invalid();
9004  Address PtrOp1 = Address::invalid();
9006  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9007  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9008  for (unsigned i = 0, e = NumArgs; i != e; i++) {
9009  if (i == 0) {
9010  switch (BuiltinID) {
9011  case NEON::BI__builtin_neon_vld1_v:
9012  case NEON::BI__builtin_neon_vld1q_v:
9013  case NEON::BI__builtin_neon_vld1q_lane_v:
9014  case NEON::BI__builtin_neon_vld1_lane_v:
9015  case NEON::BI__builtin_neon_vld1_dup_v:
9016  case NEON::BI__builtin_neon_vld1q_dup_v:
9017  case NEON::BI__builtin_neon_vst1_v:
9018  case NEON::BI__builtin_neon_vst1q_v:
9019  case NEON::BI__builtin_neon_vst1q_lane_v:
9020  case NEON::BI__builtin_neon_vst1_lane_v:
9021  case NEON::BI__builtin_neon_vst2_v:
9022  case NEON::BI__builtin_neon_vst2q_v:
9023  case NEON::BI__builtin_neon_vst2_lane_v:
9024  case NEON::BI__builtin_neon_vst2q_lane_v:
9025  case NEON::BI__builtin_neon_vst3_v:
9026  case NEON::BI__builtin_neon_vst3q_v:
9027  case NEON::BI__builtin_neon_vst3_lane_v:
9028  case NEON::BI__builtin_neon_vst3q_lane_v:
9029  case NEON::BI__builtin_neon_vst4_v:
9030  case NEON::BI__builtin_neon_vst4q_v:
9031  case NEON::BI__builtin_neon_vst4_lane_v:
9032  case NEON::BI__builtin_neon_vst4q_lane_v:
9033  // Get the alignment for the argument in addition to the value;
9034  // we'll use it later.
9035  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9036  Ops.push_back(PtrOp0.emitRawPointer(*this));
9037  continue;
9038  }
9039  }
9040  if (i == 1) {
9041  switch (BuiltinID) {
9042  case NEON::BI__builtin_neon_vld2_v:
9043  case NEON::BI__builtin_neon_vld2q_v:
9044  case NEON::BI__builtin_neon_vld3_v:
9045  case NEON::BI__builtin_neon_vld3q_v:
9046  case NEON::BI__builtin_neon_vld4_v:
9047  case NEON::BI__builtin_neon_vld4q_v:
9048  case NEON::BI__builtin_neon_vld2_lane_v:
9049  case NEON::BI__builtin_neon_vld2q_lane_v:
9050  case NEON::BI__builtin_neon_vld3_lane_v:
9051  case NEON::BI__builtin_neon_vld3q_lane_v:
9052  case NEON::BI__builtin_neon_vld4_lane_v:
9053  case NEON::BI__builtin_neon_vld4q_lane_v:
9054  case NEON::BI__builtin_neon_vld2_dup_v:
9055  case NEON::BI__builtin_neon_vld2q_dup_v:
9056  case NEON::BI__builtin_neon_vld3_dup_v:
9057  case NEON::BI__builtin_neon_vld3q_dup_v:
9058  case NEON::BI__builtin_neon_vld4_dup_v:
9059  case NEON::BI__builtin_neon_vld4q_dup_v:
9060  // Get the alignment for the argument in addition to the value;
9061  // we'll use it later.
9062  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9063  Ops.push_back(PtrOp1.emitRawPointer(*this));
9064  continue;
9065  }
9066  }
9067 
9068  Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9069  }
9070 
9071  switch (BuiltinID) {
9072  default: break;
9073 
9074  case NEON::BI__builtin_neon_vget_lane_i8:
9075  case NEON::BI__builtin_neon_vget_lane_i16:
9076  case NEON::BI__builtin_neon_vget_lane_i32:
9077  case NEON::BI__builtin_neon_vget_lane_i64:
9078  case NEON::BI__builtin_neon_vget_lane_bf16:
9079  case NEON::BI__builtin_neon_vget_lane_f32:
9080  case NEON::BI__builtin_neon_vgetq_lane_i8:
9081  case NEON::BI__builtin_neon_vgetq_lane_i16:
9082  case NEON::BI__builtin_neon_vgetq_lane_i32:
9083  case NEON::BI__builtin_neon_vgetq_lane_i64:
9084  case NEON::BI__builtin_neon_vgetq_lane_bf16:
9085  case NEON::BI__builtin_neon_vgetq_lane_f32:
9086  case NEON::BI__builtin_neon_vduph_lane_bf16:
9087  case NEON::BI__builtin_neon_vduph_laneq_bf16:
9088  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9089 
9090  case NEON::BI__builtin_neon_vrndns_f32: {
9091  Value *Arg = EmitScalarExpr(E->getArg(0));
9092  llvm::Type *Tys[] = {Arg->getType()};
9093  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9094  return Builder.CreateCall(F, {Arg}, "vrndn"); }
9095 
9096  case NEON::BI__builtin_neon_vset_lane_i8:
9097  case NEON::BI__builtin_neon_vset_lane_i16:
9098  case NEON::BI__builtin_neon_vset_lane_i32:
9099  case NEON::BI__builtin_neon_vset_lane_i64:
9100  case NEON::BI__builtin_neon_vset_lane_bf16:
9101  case NEON::BI__builtin_neon_vset_lane_f32:
9102  case NEON::BI__builtin_neon_vsetq_lane_i8:
9103  case NEON::BI__builtin_neon_vsetq_lane_i16:
9104  case NEON::BI__builtin_neon_vsetq_lane_i32:
9105  case NEON::BI__builtin_neon_vsetq_lane_i64:
9106  case NEON::BI__builtin_neon_vsetq_lane_bf16:
9107  case NEON::BI__builtin_neon_vsetq_lane_f32:
9108  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9109 
9110  case NEON::BI__builtin_neon_vsha1h_u32:
9111  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9112  "vsha1h");
9113  case NEON::BI__builtin_neon_vsha1cq_u32:
9114  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9115  "vsha1h");
9116  case NEON::BI__builtin_neon_vsha1pq_u32:
9117  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9118  "vsha1h");
9119  case NEON::BI__builtin_neon_vsha1mq_u32:
9120  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9121  "vsha1h");
9122 
9123  case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9124  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9125  "vcvtbfp2bf");
9126  }
9127 
9128  // The ARM _MoveToCoprocessor builtins put the input register value as
9129  // the first argument, but the LLVM intrinsic expects it as the third one.
9130  case clang::ARM::BI_MoveToCoprocessor:
9131  case clang::ARM::BI_MoveToCoprocessor2: {
9132  Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9133  ? Intrinsic::arm_mcr
9134  : Intrinsic::arm_mcr2);
9135  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9136  Ops[3], Ops[4], Ops[5]});
9137  }
9138  }
9139 
9140  // Get the last argument, which specifies the vector type.
9141  assert(HasExtraArg);
9142  const Expr *Arg = E->getArg(E->getNumArgs()-1);
9143  std::optional<llvm::APSInt> Result =
9144  Arg->getIntegerConstantExpr(getContext());
9145  if (!Result)
9146  return nullptr;
9147 
9148  if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9149  BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9150  // Determine the overloaded type of this builtin.
9151  llvm::Type *Ty;
9152  if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9153  Ty = FloatTy;
9154  else
9155  Ty = DoubleTy;
9156 
9157  // Determine whether this is an unsigned conversion or not.
9158  bool usgn = Result->getZExtValue() == 1;
9159  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9160 
9161  // Call the appropriate intrinsic.
9162  Function *F = CGM.getIntrinsic(Int, Ty);
9163  return Builder.CreateCall(F, Ops, "vcvtr");
9164  }
9165 
9166  // Determine the type of this overloaded NEON intrinsic.
9167  NeonTypeFlags Type = Result->getZExtValue();
9168  bool usgn = Type.isUnsigned();
9169  bool rightShift = false;
9170 
9171  llvm::FixedVectorType *VTy =
9172  GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9173  getTarget().hasBFloat16Type());
9174  llvm::Type *Ty = VTy;
9175  if (!Ty)
9176  return nullptr;
9177 
9178  // Many NEON builtins have identical semantics and uses in ARM and
9179  // AArch64. Emit these in a single function.
9180  auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9181  const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9182  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9183  if (Builtin)
9184  return EmitCommonNeonBuiltinExpr(
9185  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9186  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9187 
9188  unsigned Int;
9189  switch (BuiltinID) {
9190  default: return nullptr;
9191  case NEON::BI__builtin_neon_vld1q_lane_v:
9192  // Handle 64-bit integer elements as a special case. Use shuffles of
9193  // one-element vectors to avoid poor code for i64 in the backend.
9194  if (VTy->getElementType()->isIntegerTy(64)) {
9195  // Extract the other lane.
9196  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9197  int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9198  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9199  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9200  // Load the value as a one-element vector.
9201  Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9202  llvm::Type *Tys[] = {Ty, Int8PtrTy};
9203  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9204  Value *Align = getAlignmentValue32(PtrOp0);
9205  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9206  // Combine them.
9207  int Indices[] = {1 - Lane, Lane};
9208  return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9209  }
9210  [[fallthrough]];
9211  case NEON::BI__builtin_neon_vld1_lane_v: {
9212  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9213  PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9214  Value *Ld = Builder.CreateLoad(PtrOp0);
9215  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9216  }
9217  case NEON::BI__builtin_neon_vqrshrn_n_v:
9218  Int =
9219  usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9220  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9221  1, true);
9222  case NEON::BI__builtin_neon_vqrshrun_n_v:
9223  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9224  Ops, "vqrshrun_n", 1, true);
9225  case NEON::BI__builtin_neon_vqshrn_n_v:
9226  Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9227  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9228  1, true);
9229  case NEON::BI__builtin_neon_vqshrun_n_v:
9230  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9231  Ops, "vqshrun_n", 1, true);
9232  case NEON::BI__builtin_neon_vrecpe_v:
9233  case NEON::BI__builtin_neon_vrecpeq_v:
9234  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9235  Ops, "vrecpe");
9236  case NEON::BI__builtin_neon_vrshrn_n_v:
9237  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9238  Ops, "vrshrn_n", 1, true);
9239  case NEON::BI__builtin_neon_vrsra_n_v:
9240  case NEON::BI__builtin_neon_vrsraq_n_v:
9241  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9242  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9243  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9244  Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9245  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9246  return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9247  case NEON::BI__builtin_neon_vsri_n_v:
9248  case NEON::BI__builtin_neon_vsriq_n_v:
9249  rightShift = true;
9250  [[fallthrough]];
9251  case NEON::BI__builtin_neon_vsli_n_v:
9252  case NEON::BI__builtin_neon_vsliq_n_v:
9253  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9254  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9255  Ops, "vsli_n");
9256  case NEON::BI__builtin_neon_vsra_n_v:
9257  case NEON::BI__builtin_neon_vsraq_n_v:
9258  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9259  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9260  return Builder.CreateAdd(Ops[0], Ops[1]);
9261  case NEON::BI__builtin_neon_vst1q_lane_v:
9262  // Handle 64-bit integer elements as a special case. Use a shuffle to get
9263  // a one-element vector and avoid poor code for i64 in the backend.
9264  if (VTy->getElementType()->isIntegerTy(64)) {
9265  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9266  Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9267  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9268  Ops[2] = getAlignmentValue32(PtrOp0);
9269  llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9270  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9271  Tys), Ops);
9272  }
9273  [[fallthrough]];
9274  case NEON::BI__builtin_neon_vst1_lane_v: {
9275  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9276  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9277  return Builder.CreateStore(Ops[1],
9278  PtrOp0.withElementType(Ops[1]->getType()));
9279  }
9280  case NEON::BI__builtin_neon_vtbl1_v:
9281  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9282  Ops, "vtbl1");
9283  case NEON::BI__builtin_neon_vtbl2_v:
9284  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9285  Ops, "vtbl2");
9286  case NEON::BI__builtin_neon_vtbl3_v:
9287  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9288  Ops, "vtbl3");
9289  case NEON::BI__builtin_neon_vtbl4_v:
9290  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9291  Ops, "vtbl4");
9292  case NEON::BI__builtin_neon_vtbx1_v:
9293  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9294  Ops, "vtbx1");
9295  case NEON::BI__builtin_neon_vtbx2_v:
9296  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9297  Ops, "vtbx2");
9298  case NEON::BI__builtin_neon_vtbx3_v:
9299  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9300  Ops, "vtbx3");
9301  case NEON::BI__builtin_neon_vtbx4_v:
9302  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9303  Ops, "vtbx4");
9304  }
9305 }
9306 
9307 template<typename Integer>
9308 static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
9309  return E->getIntegerConstantExpr(Context)->getExtValue();
9310 }
9311 
9312 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9313  llvm::Type *T, bool Unsigned) {
9314  // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9315  // which finds it convenient to specify signed/unsigned as a boolean flag.
9316  return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9317 }
9318 
9319 static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9320  uint32_t Shift, bool Unsigned) {
9321  // MVE helper function for integer shift right. This must handle signed vs
9322  // unsigned, and also deal specially with the case where the shift count is
9323  // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9324  // undefined behavior, but in MVE it's legal, so we must convert it to code
9325  // that is not undefined in IR.
9326  unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9327  ->getElementType()
9328  ->getPrimitiveSizeInBits();
9329  if (Shift == LaneBits) {
9330  // An unsigned shift of the full lane size always generates zero, so we can
9331  // simply emit a zero vector. A signed shift of the full lane size does the
9332  // same thing as shifting by one bit fewer.
9333  if (Unsigned)
9334  return llvm::Constant::getNullValue(V->getType());
9335  else
9336  --Shift;
9337  }
9338  return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9339 }
9340 
9341 static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9342  // MVE-specific helper function for a vector splat, which infers the element
9343  // count of the output vector by knowing that MVE vectors are all 128 bits
9344  // wide.
9345  unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9346  return Builder.CreateVectorSplat(Elements, V);
9347 }
9348 
9349 static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9350  CodeGenFunction *CGF,
9351  llvm::Value *V,
9352  llvm::Type *DestType) {
9353  // Convert one MVE vector type into another by reinterpreting its in-register
9354  // format.
9355  //
9356  // Little-endian, this is identical to a bitcast (which reinterprets the
9357  // memory format). But big-endian, they're not necessarily the same, because
9358  // the register and memory formats map to each other differently depending on
9359  // the lane size.
9360  //
9361  // We generate a bitcast whenever we can (if we're little-endian, or if the
9362  // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9363  // that performs the different kind of reinterpretation.
9364  if (CGF->getTarget().isBigEndian() &&
9365  V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9366  return Builder.CreateCall(
9367  CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9368  {DestType, V->getType()}),
9369  V);
9370  } else {
9371  return Builder.CreateBitCast(V, DestType);
9372  }
9373 }
9374 
9375 static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9376  // Make a shufflevector that extracts every other element of a vector (evens
9377  // or odds, as desired).
9378  SmallVector<int, 16> Indices;
9379  unsigned InputElements =
9380  cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9381  for (unsigned i = 0; i < InputElements; i += 2)
9382  Indices.push_back(i + Odd);
9383  return Builder.CreateShuffleVector(V, Indices);
9384 }
9385 
9386 static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9387  llvm::Value *V1) {
9388  // Make a shufflevector that interleaves two vectors element by element.
9389  assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9390  SmallVector<int, 16> Indices;
9391  unsigned InputElements =
9392  cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9393  for (unsigned i = 0; i < InputElements; i++) {
9394  Indices.push_back(i);
9395  Indices.push_back(i + InputElements);
9396  }
9397  return Builder.CreateShuffleVector(V0, V1, Indices);
9398 }
9399 
9400 template<unsigned HighBit, unsigned OtherBits>
9401 static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9402  // MVE-specific helper function to make a vector splat of a constant such as
9403  // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9404  llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9405  unsigned LaneBits = T->getPrimitiveSizeInBits();
9406  uint32_t Value = HighBit << (LaneBits - 1);
9407  if (OtherBits)
9408  Value |= (1UL << (LaneBits - 1)) - 1;
9409  llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9410  return ARMMVEVectorSplat(Builder, Lane);
9411 }
9412 
9413 static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9414  llvm::Value *V,
9415  unsigned ReverseWidth) {
9416  // MVE-specific helper function which reverses the elements of a
9417  // vector within every (ReverseWidth)-bit collection of lanes.
9418  SmallVector<int, 16> Indices;
9419  unsigned LaneSize = V->getType()->getScalarSizeInBits();
9420  unsigned Elements = 128 / LaneSize;
9421  unsigned Mask = ReverseWidth / LaneSize - 1;
9422  for (unsigned i = 0; i < Elements; i++)
9423  Indices.push_back(i ^ Mask);
9424  return Builder.CreateShuffleVector(V, Indices);
9425 }
9426 
9428  const CallExpr *E,
9429  ReturnValueSlot ReturnValue,
9430  llvm::Triple::ArchType Arch) {
9431  enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9432  Intrinsic::ID IRIntr;
9433  unsigned NumVectors;
9434 
9435  // Code autogenerated by Tablegen will handle all the simple builtins.
9436  switch (BuiltinID) {
9437  #include "clang/Basic/arm_mve_builtin_cg.inc"
9438 
9439  // If we didn't match an MVE builtin id at all, go back to the
9440  // main EmitARMBuiltinExpr.
9441  default:
9442  return nullptr;
9443  }
9444 
9445  // Anything that breaks from that switch is an MVE builtin that
9446  // needs handwritten code to generate.
9447 
9448  switch (CustomCodeGenType) {
9449 
9450  case CustomCodeGen::VLD24: {
9453 
9454  auto MvecCType = E->getType();
9455  auto MvecLType = ConvertType(MvecCType);
9456  assert(MvecLType->isStructTy() &&
9457  "Return type for vld[24]q should be a struct");
9458  assert(MvecLType->getStructNumElements() == 1 &&
9459  "Return-type struct for vld[24]q should have one element");
9460  auto MvecLTypeInner = MvecLType->getStructElementType(0);
9461  assert(MvecLTypeInner->isArrayTy() &&
9462  "Return-type struct for vld[24]q should contain an array");
9463  assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9464  "Array member of return-type struct vld[24]q has wrong length");
9465  auto VecLType = MvecLTypeInner->getArrayElementType();
9466 
9467  Tys.push_back(VecLType);
9468 
9469  auto Addr = E->getArg(0);
9470  Ops.push_back(EmitScalarExpr(Addr));
9471  Tys.push_back(ConvertType(Addr->getType()));
9472 
9473  Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9474  Value *LoadResult = Builder.CreateCall(F, Ops);
9475  Value *MvecOut = PoisonValue::get(MvecLType);
9476  for (unsigned i = 0; i < NumVectors; ++i) {
9477  Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9478  MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9479  }
9480 
9481  if (ReturnValue.isNull())
9482  return MvecOut;
9483  else
9484  return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9485  }
9486 
9487  case CustomCodeGen::VST24: {
9490 
9491  auto Addr = E->getArg(0);
9492  Ops.push_back(EmitScalarExpr(Addr));
9493  Tys.push_back(ConvertType(Addr->getType()));
9494 
9495  auto MvecCType = E->getArg(1)->getType();
9496  auto MvecLType = ConvertType(MvecCType);
9497  assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9498  assert(MvecLType->getStructNumElements() == 1 &&
9499  "Data-type struct for vst2q should have one element");
9500  auto MvecLTypeInner = MvecLType->getStructElementType(0);
9501  assert(MvecLTypeInner->isArrayTy() &&
9502  "Data-type struct for vst2q should contain an array");
9503  assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9504  "Array member of return-type struct vld[24]q has wrong length");
9505  auto VecLType = MvecLTypeInner->getArrayElementType();
9506 
9507  Tys.push_back(VecLType);
9508 
9509  AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9510  EmitAggExpr(E->getArg(1), MvecSlot);
9511  auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9512  for (unsigned i = 0; i < NumVectors; i++)
9513  Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9514 
9515  Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9516  Value *ToReturn = nullptr;
9517  for (unsigned i = 0; i < NumVectors; i++) {
9518  Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9519  ToReturn = Builder.CreateCall(F, Ops);
9520  Ops.pop_back();
9521  }
9522  return ToReturn;
9523  }
9524  }
9525  llvm_unreachable("unknown custom codegen type.");
9526 }
9527 
9529  const CallExpr *E,
9530  ReturnValueSlot ReturnValue,
9531  llvm::Triple::ArchType Arch) {
9532  switch (BuiltinID) {
9533  default:
9534  return nullptr;
9535 #include "clang/Basic/arm_cde_builtin_cg.inc"
9536  }
9537 }
9538 
9539 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9540  const CallExpr *E,
9542  llvm::Triple::ArchType Arch) {
9543  unsigned int Int = 0;
9544  const char *s = nullptr;
9545 
9546  switch (BuiltinID) {
9547  default:
9548  return nullptr;
9549  case NEON::BI__builtin_neon_vtbl1_v:
9550  case NEON::BI__builtin_neon_vqtbl1_v:
9551  case NEON::BI__builtin_neon_vqtbl1q_v:
9552  case NEON::BI__builtin_neon_vtbl2_v:
9553  case NEON::BI__builtin_neon_vqtbl2_v:
9554  case NEON::BI__builtin_neon_vqtbl2q_v:
9555  case NEON::BI__builtin_neon_vtbl3_v:
9556  case NEON::BI__builtin_neon_vqtbl3_v:
9557  case NEON::BI__builtin_neon_vqtbl3q_v:
9558  case NEON::BI__builtin_neon_vtbl4_v:
9559  case NEON::BI__builtin_neon_vqtbl4_v:
9560  case NEON::BI__builtin_neon_vqtbl4q_v:
9561  break;
9562  case NEON::BI__builtin_neon_vtbx1_v:
9563  case NEON::BI__builtin_neon_vqtbx1_v:
9564  case NEON::BI__builtin_neon_vqtbx1q_v:
9565  case NEON::BI__builtin_neon_vtbx2_v:
9566  case NEON::BI__builtin_neon_vqtbx2_v:
9567  case NEON::BI__builtin_neon_vqtbx2q_v:
9568  case NEON::BI__builtin_neon_vtbx3_v:
9569  case NEON::BI__builtin_neon_vqtbx3_v:
9570  case NEON::BI__builtin_neon_vqtbx3q_v:
9571  case NEON::BI__builtin_neon_vtbx4_v:
9572  case NEON::BI__builtin_neon_vqtbx4_v:
9573  case NEON::BI__builtin_neon_vqtbx4q_v:
9574  break;
9575  }
9576 
9577  assert(E->getNumArgs() >= 3);
9578 
9579  // Get the last argument, which specifies the vector type.
9580  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9581  std::optional<llvm::APSInt> Result =
9582  Arg->getIntegerConstantExpr(CGF.getContext());
9583  if (!Result)
9584  return nullptr;
9585 
9586  // Determine the type of this overloaded NEON intrinsic.
9587  NeonTypeFlags Type = Result->getZExtValue();
9588  llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9589  if (!Ty)
9590  return nullptr;
9591 
9592  CodeGen::CGBuilderTy &Builder = CGF.Builder;
9593 
9594  // AArch64 scalar builtins are not overloaded, they do not have an extra
9595  // argument that specifies the vector type, need to handle each case.
9596  switch (BuiltinID) {
9597  case NEON::BI__builtin_neon_vtbl1_v: {
9598  return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9599  Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9600  }
9601  case NEON::BI__builtin_neon_vtbl2_v: {
9602  return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9603  Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9604  }
9605  case NEON::BI__builtin_neon_vtbl3_v: {
9606  return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9607  Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9608  }
9609  case NEON::BI__builtin_neon_vtbl4_v: {
9610  return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9611  Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9612  }
9613  case NEON::BI__builtin_neon_vtbx1_v: {
9614  Value *TblRes =
9615  packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9616  Intrinsic::aarch64_neon_tbl1, "vtbl1");
9617 
9618  llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9619  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9620  CmpRes = Builder.CreateSExt(CmpRes, Ty);
9621 
9622  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9623  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9624  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9625  }
9626  case NEON::BI__builtin_neon_vtbx2_v: {
9627  return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9628  Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9629  }
9630  case NEON::BI__builtin_neon_vtbx3_v: {
9631  Value *TblRes =
9632  packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9633  Intrinsic::aarch64_neon_tbl2, "vtbl2");
9634 
9635  llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9636  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9637  TwentyFourV);
9638  CmpRes = Builder.CreateSExt(CmpRes, Ty);
9639 
9640  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9641  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9642  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9643  }
9644  case NEON::BI__builtin_neon_vtbx4_v: {
9645  return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9646  Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9647  }
9648  case NEON::BI__builtin_neon_vqtbl1_v:
9649  case NEON::BI__builtin_neon_vqtbl1q_v:
9650  Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9651  case NEON::BI__builtin_neon_vqtbl2_v:
9652  case NEON::BI__builtin_neon_vqtbl2q_v: {
9653  Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9654  case NEON::BI__builtin_neon_vqtbl3_v:
9655  case NEON::BI__builtin_neon_vqtbl3q_v:
9656  Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9657  case NEON::BI__builtin_neon_vqtbl4_v:
9658  case NEON::BI__builtin_neon_vqtbl4q_v:
9659  Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9660  case NEON::BI__builtin_neon_vqtbx1_v:
9661  case NEON::BI__builtin_neon_vqtbx1q_v:
9662  Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9663  case NEON::BI__builtin_neon_vqtbx2_v:
9664  case NEON::BI__builtin_neon_vqtbx2q_v:
9665  Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9666  case NEON::BI__builtin_neon_vqtbx3_v:
9667  case NEON::BI__builtin_neon_vqtbx3q_v:
9668  Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9669  case NEON::BI__builtin_neon_vqtbx4_v:
9670  case NEON::BI__builtin_neon_vqtbx4q_v:
9671  Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9672  }
9673  }
9674 
9675  if (!Int)
9676  return nullptr;
9677 
9678  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9679  return CGF.EmitNeonCall(F, Ops, s);
9680 }
9681 
9683  auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9684  Op = Builder.CreateBitCast(Op, Int16Ty);
9685  Value *V = PoisonValue::get(VTy);
9686  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9687  Op = Builder.CreateInsertElement(V, Op, CI);
9688  return Op;
9689 }
9690 
9691 /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9692 /// access builtin. Only required if it can't be inferred from the base pointer
9693 /// operand.
9694 llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9695  switch (TypeFlags.getMemEltType()) {
9696  case SVETypeFlags::MemEltTyDefault:
9697  return getEltType(TypeFlags);
9698  case SVETypeFlags::MemEltTyInt8:
9699  return Builder.getInt8Ty();
9700  case SVETypeFlags::MemEltTyInt16:
9701  return Builder.getInt16Ty();
9702  case SVETypeFlags::MemEltTyInt32:
9703  return Builder.getInt32Ty();
9704  case SVETypeFlags::MemEltTyInt64:
9705  return Builder.getInt64Ty();
9706  }
9707  llvm_unreachable("Unknown MemEltType");
9708 }
9709 
9710 llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9711  switch (TypeFlags.getEltType()) {
9712  default:
9713  llvm_unreachable("Invalid SVETypeFlag!");
9714 
9715  case SVETypeFlags::EltTyInt8:
9716  return Builder.getInt8Ty();
9717  case SVETypeFlags::EltTyInt16:
9718  return Builder.getInt16Ty();
9719  case SVETypeFlags::EltTyInt32:
9720  return Builder.getInt32Ty();
9721  case SVETypeFlags::EltTyInt64:
9722  return Builder.getInt64Ty();
9723  case SVETypeFlags::EltTyInt128:
9724  return Builder.getInt128Ty();
9725 
9726  case SVETypeFlags::EltTyFloat16:
9727  return Builder.getHalfTy();
9728  case SVETypeFlags::EltTyFloat32:
9729  return Builder.getFloatTy();
9730  case SVETypeFlags::EltTyFloat64:
9731  return Builder.getDoubleTy();
9732 
9733  case SVETypeFlags::EltTyBFloat16:
9734  return Builder.getBFloatTy();
9735 
9736  case SVETypeFlags::EltTyBool8:
9737  case SVETypeFlags::EltTyBool16:
9738  case SVETypeFlags::EltTyBool32:
9739  case SVETypeFlags::EltTyBool64:
9740  return Builder.getInt1Ty();
9741  }
9742 }
9743 
9744 // Return the llvm predicate vector type corresponding to the specified element
9745 // TypeFlags.
9746 llvm::ScalableVectorType *
9748  switch (TypeFlags.getEltType()) {
9749  default: llvm_unreachable("Unhandled SVETypeFlag!");
9750 
9751  case SVETypeFlags::EltTyInt8:
9752  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9753  case SVETypeFlags::EltTyInt16:
9754  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9755  case SVETypeFlags::EltTyInt32:
9756  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9757  case SVETypeFlags::EltTyInt64:
9758  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9759 
9760  case SVETypeFlags::EltTyBFloat16:
9761  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9762  case SVETypeFlags::EltTyFloat16:
9763  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9764  case SVETypeFlags::EltTyFloat32:
9765  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9766  case SVETypeFlags::EltTyFloat64:
9767  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9768 
9769  case SVETypeFlags::EltTyBool8:
9770  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9771  case SVETypeFlags::EltTyBool16:
9772  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9773  case SVETypeFlags::EltTyBool32:
9774  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9775  case SVETypeFlags::EltTyBool64:
9776  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9777  }
9778 }
9779 
9780 // Return the llvm vector type corresponding to the specified element TypeFlags.
9781 llvm::ScalableVectorType *
9783  switch (TypeFlags.getEltType()) {
9784  default:
9785  llvm_unreachable("Invalid SVETypeFlag!");
9786 
9787  case SVETypeFlags::EltTyInt8:
9788  return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9789  case SVETypeFlags::EltTyInt16:
9790  return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9791  case SVETypeFlags::EltTyInt32:
9792  return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9793  case SVETypeFlags::EltTyInt64:
9794  return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9795 
9796  case SVETypeFlags::EltTyFloat16:
9797  return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9798  case SVETypeFlags::EltTyBFloat16:
9799  return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9800  case SVETypeFlags::EltTyFloat32:
9801  return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9802  case SVETypeFlags::EltTyFloat64:
9803  return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9804 
9805  case SVETypeFlags::EltTyBool8:
9806  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9807  case SVETypeFlags::EltTyBool16:
9808  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9809  case SVETypeFlags::EltTyBool32:
9810  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9811  case SVETypeFlags::EltTyBool64:
9812  return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9813  }
9814 }
9815 
9816 llvm::Value *
9818  Function *Ptrue =
9819  CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9820  return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9821 }
9822 
9823 constexpr unsigned SVEBitsPerBlock = 128;
9824 
9825 static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9826  unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9827  return llvm::ScalableVectorType::get(EltTy, NumElts);
9828 }
9829 
9830 // Reinterpret the input predicate so that it can be used to correctly isolate
9831 // the elements of the specified datatype.
9833  llvm::ScalableVectorType *VTy) {
9834 
9835  if (isa<TargetExtType>(Pred->getType()) &&
9836  cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9837  return Pred;
9838 
9839  auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9840  if (Pred->getType() == RTy)
9841  return Pred;
9842 
9843  unsigned IntID;
9844  llvm::Type *IntrinsicTy;
9845  switch (VTy->getMinNumElements()) {
9846  default:
9847  llvm_unreachable("unsupported element count!");
9848  case 1:
9849  case 2:
9850  case 4:
9851  case 8:
9852  IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9853  IntrinsicTy = RTy;
9854  break;
9855  case 16:
9856  IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9857  IntrinsicTy = Pred->getType();
9858  break;
9859  }
9860 
9861  Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9862  Value *C = Builder.CreateCall(F, Pred);
9863  assert(C->getType() == RTy && "Unexpected return type!");
9864  return C;
9865 }
9866 
9869  unsigned IntID) {
9870  auto *ResultTy = getSVEType(TypeFlags);
9871  auto *OverloadedTy =
9872  llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9873 
9874  Function *F = nullptr;
9875  if (Ops[1]->getType()->isVectorTy())
9876  // This is the "vector base, scalar offset" case. In order to uniquely
9877  // map this built-in to an LLVM IR intrinsic, we need both the return type
9878  // and the type of the vector base.
9879  F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9880  else
9881  // This is the "scalar base, vector offset case". The type of the offset
9882  // is encoded in the name of the intrinsic. We only need to specify the
9883  // return type in order to uniquely map this built-in to an LLVM IR
9884  // intrinsic.
9885  F = CGM.getIntrinsic(IntID, OverloadedTy);
9886 
9887  // At the ACLE level there's only one predicate type, svbool_t, which is
9888  // mapped to <n x 16 x i1>. However, this might be incompatible with the
9889  // actual type being loaded. For example, when loading doubles (i64) the
9890  // predicate should be <n x 2 x i1> instead. At the IR level the type of
9891  // the predicate and the data being loaded must match. Cast to the type
9892  // expected by the intrinsic. The intrinsic itself should be defined in
9893  // a way than enforces relations between parameter types.
9894  Ops[0] = EmitSVEPredicateCast(
9895  Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9896 
9897  // Pass 0 when the offset is missing. This can only be applied when using
9898  // the "vector base" addressing mode for which ACLE allows no offset. The
9899  // corresponding LLVM IR always requires an offset.
9900  if (Ops.size() == 2) {
9901  assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9902  Ops.push_back(ConstantInt::get(Int64Ty, 0));
9903  }
9904 
9905  // For "vector base, scalar index" scale the index so that it becomes a
9906  // scalar offset.
9907  if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9908  unsigned BytesPerElt =
9909  OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9910  Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9911  }
9912 
9913  Value *Call = Builder.CreateCall(F, Ops);
9914 
9915  // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9916  // other cases it's folded into a nop.
9917  return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9918  : Builder.CreateSExt(Call, ResultTy);
9919 }
9920 
9923  unsigned IntID) {
9924  auto *SrcDataTy = getSVEType(TypeFlags);
9925  auto *OverloadedTy =
9926  llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9927 
9928  // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9929  // it's the first argument. Move it accordingly.
9930  Ops.insert(Ops.begin(), Ops.pop_back_val());
9931 
9932  Function *F = nullptr;
9933  if (Ops[2]->getType()->isVectorTy())
9934  // This is the "vector base, scalar offset" case. In order to uniquely
9935  // map this built-in to an LLVM IR intrinsic, we need both the return type
9936  // and the type of the vector base.
9937  F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9938  else
9939  // This is the "scalar base, vector offset case". The type of the offset
9940  // is encoded in the name of the intrinsic. We only need to specify the
9941  // return type in order to uniquely map this built-in to an LLVM IR
9942  // intrinsic.
9943  F = CGM.getIntrinsic(IntID, OverloadedTy);
9944 
9945  // Pass 0 when the offset is missing. This can only be applied when using
9946  // the "vector base" addressing mode for which ACLE allows no offset. The
9947  // corresponding LLVM IR always requires an offset.
9948  if (Ops.size() == 3) {
9949  assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9950  Ops.push_back(ConstantInt::get(Int64Ty, 0));
9951  }
9952 
9953  // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9954  // folded into a nop.
9955  Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9956 
9957  // At the ACLE level there's only one predicate type, svbool_t, which is
9958  // mapped to <n x 16 x i1>. However, this might be incompatible with the
9959  // actual type being stored. For example, when storing doubles (i64) the
9960  // predicated should be <n x 2 x i1> instead. At the IR level the type of
9961  // the predicate and the data being stored must match. Cast to the type
9962  // expected by the intrinsic. The intrinsic itself should be defined in
9963  // a way that enforces relations between parameter types.
9964  Ops[1] = EmitSVEPredicateCast(
9965  Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9966 
9967  // For "vector base, scalar index" scale the index so that it becomes a
9968  // scalar offset.
9969  if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9970  unsigned BytesPerElt =
9971  OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9972  Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9973  }
9974 
9975  return Builder.CreateCall(F, Ops);
9976 }
9977 
9980  unsigned IntID) {
9981  // The gather prefetches are overloaded on the vector input - this can either
9982  // be the vector of base addresses or vector of offsets.
9983  auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9984  if (!OverloadedTy)
9985  OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9986 
9987  // Cast the predicate from svbool_t to the right number of elements.
9988  Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9989 
9990  // vector + imm addressing modes
9991  if (Ops[1]->getType()->isVectorTy()) {
9992  if (Ops.size() == 3) {
9993  // Pass 0 for 'vector+imm' when the index is omitted.
9994  Ops.push_back(ConstantInt::get(Int64Ty, 0));
9995 
9996  // The sv_prfop is the last operand in the builtin and IR intrinsic.
9997  std::swap(Ops[2], Ops[3]);
9998  } else {
9999  // Index needs to be passed as scaled offset.
10000  llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10001  unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10002  if (BytesPerElt > 1)
10003  Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10004  }
10005  }
10006 
10007  Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10008  return Builder.CreateCall(F, Ops);
10009 }
10010 
10013  unsigned IntID) {
10014  llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10015 
10016  unsigned N;
10017  switch (IntID) {
10018  case Intrinsic::aarch64_sve_ld2_sret:
10019  case Intrinsic::aarch64_sve_ld1_pn_x2:
10020  case Intrinsic::aarch64_sve_ldnt1_pn_x2:
10021  case Intrinsic::aarch64_sve_ld2q_sret:
10022  N = 2;
10023  break;
10024  case Intrinsic::aarch64_sve_ld3_sret:
10025  case Intrinsic::aarch64_sve_ld3q_sret:
10026  N = 3;
10027  break;
10028  case Intrinsic::aarch64_sve_ld4_sret:
10029  case Intrinsic::aarch64_sve_ld1_pn_x4:
10030  case Intrinsic::aarch64_sve_ldnt1_pn_x4:
10031  case Intrinsic::aarch64_sve_ld4q_sret:
10032  N = 4;
10033  break;
10034  default:
10035  llvm_unreachable("unknown intrinsic!");
10036  }
10037  auto RetTy = llvm::VectorType::get(VTy->getElementType(),
10038  VTy->getElementCount() * N);
10039 
10040  Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10041  Value *BasePtr = Ops[1];
10042 
10043  // Does the load have an offset?
10044  if (Ops.size() > 2)
10045  BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10046 
10047  Function *F = CGM.getIntrinsic(IntID, {VTy});
10048  Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
10049  unsigned MinElts = VTy->getMinNumElements();
10050  Value *Ret = llvm::PoisonValue::get(RetTy);
10051  for (unsigned I = 0; I < N; I++) {
10052  Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10053  Value *SRet = Builder.CreateExtractValue(Call, I);
10054  Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
10055  }
10056  return Ret;
10057 }
10058 
10061  unsigned IntID) {
10062  llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10063 
10064  unsigned N;
10065  switch (IntID) {
10066  case Intrinsic::aarch64_sve_st2:
10067  case Intrinsic::aarch64_sve_st1_pn_x2:
10068  case Intrinsic::aarch64_sve_stnt1_pn_x2:
10069  case Intrinsic::aarch64_sve_st2q:
10070  N = 2;
10071  break;
10072  case Intrinsic::aarch64_sve_st3:
10073  case Intrinsic::aarch64_sve_st3q:
10074  N = 3;
10075  break;
10076  case Intrinsic::aarch64_sve_st4:
10077  case Intrinsic::aarch64_sve_st1_pn_x4:
10078  case Intrinsic::aarch64_sve_stnt1_pn_x4:
10079  case Intrinsic::aarch64_sve_st4q:
10080  N = 4;
10081  break;
10082  default:
10083  llvm_unreachable("unknown intrinsic!");
10084  }
10085 
10086  Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10087  Value *BasePtr = Ops[1];
10088 
10089  // Does the store have an offset?
10090  if (Ops.size() > (2 + N))
10091  BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10092 
10093  // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10094  // need to break up the tuple vector.
10096  for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10097  Operands.push_back(Ops[I]);
10098  Operands.append({Predicate, BasePtr});
10099  Function *F = CGM.getIntrinsic(IntID, { VTy });
10100 
10101  return Builder.CreateCall(F, Operands);
10102 }
10103 
10104 // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10105 // svpmullt_pair intrinsics, with the exception that their results are bitcast
10106 // to a wider type.
10109  unsigned BuiltinID) {
10110  // Splat scalar operand to vector (intrinsics with _n infix)
10111  if (TypeFlags.hasSplatOperand()) {
10112  unsigned OpNo = TypeFlags.getSplatOperand();
10113  Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10114  }
10115 
10116  // The pair-wise function has a narrower overloaded type.
10117  Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10118  Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10119 
10120  // Now bitcast to the wider result type.
10121  llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10122  return EmitSVEReinterpret(Call, Ty);
10123 }
10124 
10126  ArrayRef<Value *> Ops, unsigned BuiltinID) {
10127  llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10128  Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10129  return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10130 }
10131 
10134  unsigned BuiltinID) {
10135  auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10136  auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10137  auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10138 
10139  Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10140  Value *BasePtr = Ops[1];
10141 
10142  // Implement the index operand if not omitted.
10143  if (Ops.size() > 3) {
10144  BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10145  }
10146 
10147  Value *PrfOp = Ops.back();
10148 
10149  Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10150  return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10151 }
10152 
10154  llvm::Type *ReturnTy,
10156  unsigned IntrinsicID,
10157  bool IsZExtReturn) {
10158  QualType LangPTy = E->getArg(1)->getType();
10159  llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10160  LangPTy->castAs<PointerType>()->getPointeeType());
10161 
10162  // The vector type that is returned may be different from the
10163  // eventual type loaded from memory.
10164  auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10165  llvm::ScalableVectorType *MemoryTy = nullptr;
10166  llvm::ScalableVectorType *PredTy = nullptr;
10167  bool IsQuadLoad = false;
10168  switch (IntrinsicID) {
10169  case Intrinsic::aarch64_sve_ld1uwq:
10170  case Intrinsic::aarch64_sve_ld1udq:
10171  MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10172  PredTy = llvm::ScalableVectorType::get(
10173  llvm::Type::getInt1Ty(getLLVMContext()), 1);
10174  IsQuadLoad = true;
10175  break;
10176  default:
10177  MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10178  PredTy = MemoryTy;
10179  break;
10180  }
10181 
10182  Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10183  Value *BasePtr = Ops[1];
10184 
10185  // Does the load have an offset?
10186  if (Ops.size() > 2)
10187  BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10188 
10189  BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
10190  Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10191  auto *Load =
10192  cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10193  auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10194  CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10195 
10196  if (IsQuadLoad)
10197  return Load;
10198 
10199  return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10200  : Builder.CreateSExt(Load, VectorTy);
10201 }
10202 
10205  unsigned IntrinsicID) {
10206  QualType LangPTy = E->getArg(1)->getType();
10207  llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10208  LangPTy->castAs<PointerType>()->getPointeeType());
10209 
10210  // The vector type that is stored may be different from the
10211  // eventual type stored to memory.
10212  auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10213  auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10214 
10215  auto PredTy = MemoryTy;
10216  auto AddrMemoryTy = MemoryTy;
10217  bool IsQuadStore = false;
10218 
10219  switch (IntrinsicID) {
10220  case Intrinsic::aarch64_sve_st1wq:
10221  case Intrinsic::aarch64_sve_st1dq:
10222  AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10223  PredTy =
10224  llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10225  IsQuadStore = true;
10226  break;
10227  default:
10228  break;
10229  }
10230  Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10231  Value *BasePtr = Ops[1];
10232 
10233  // Does the store have an offset?
10234  if (Ops.size() == 4)
10235  BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10236 
10237  // Last value is always the data
10238  Value *Val =
10239  IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10240 
10241  BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
10242  Function *F =
10243  CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10244  auto *Store =
10245  cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10246  auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10247  CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10248  return Store;
10249 }
10250 
10253  unsigned IntID) {
10254  Ops[2] = EmitSVEPredicateCast(
10255  Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
10256 
10257  SmallVector<Value *> NewOps;
10258  NewOps.push_back(Ops[2]);
10259 
10260  llvm::Value *BasePtr = Ops[3];
10261 
10262  // If the intrinsic contains the vnum parameter, multiply it with the vector
10263  // size in bytes.
10264  if (Ops.size() == 5) {
10265  Function *StreamingVectorLength =
10266  CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10267  llvm::Value *StreamingVectorLengthCall =
10268  Builder.CreateCall(StreamingVectorLength);
10269  llvm::Value *Mulvl =
10270  Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10271  // The type of the ptr parameter is void *, so use Int8Ty here.
10272  BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10273  }
10274  NewOps.push_back(BasePtr);
10275  NewOps.push_back(Ops[0]);
10276  NewOps.push_back(Ops[1]);
10277  Function *F = CGM.getIntrinsic(IntID);
10278  return Builder.CreateCall(F, NewOps);
10279 }
10280 
10283  unsigned IntID) {
10284  auto *VecTy = getSVEType(TypeFlags);
10285  Function *F = CGM.getIntrinsic(IntID, VecTy);
10286  if (TypeFlags.isReadZA())
10287  Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10288  else if (TypeFlags.isWriteZA())
10289  Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10290  return Builder.CreateCall(F, Ops);
10291 }
10292 
10295  unsigned IntID) {
10296  // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10297  if (Ops.size() == 0)
10298  Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10299  Function *F = CGM.getIntrinsic(IntID, {});
10300  return Builder.CreateCall(F, Ops);
10301 }
10302 
10305  unsigned IntID) {
10306  if (Ops.size() == 2)
10307  Ops.push_back(Builder.getInt32(0));
10308  else
10309  Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10310  Function *F = CGM.getIntrinsic(IntID, {});
10311  return Builder.CreateCall(F, Ops);
10312 }
10313 
10314 // Limit the usage of scalable llvm IR generated by the ACLE by using the
10315 // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10316 Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10317  return Builder.CreateVectorSplat(
10318  cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10319 }
10320 
10322  return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10323 }
10324 
10326  // FIXME: For big endian this needs an additional REV, or needs a separate
10327  // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10328  // instruction is defined as 'bitwise' equivalent from memory point of
10329  // view (when storing/reloading), whereas the svreinterpret builtin
10330  // implements bitwise equivalent cast from register point of view.
10331  // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10332  return Builder.CreateBitCast(Val, Ty);
10333 }
10334 
10335 static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10336  SmallVectorImpl<Value *> &Ops) {
10337  auto *SplatZero = Constant::getNullValue(Ty);
10338  Ops.insert(Ops.begin(), SplatZero);
10339 }
10340 
10341 static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10342  SmallVectorImpl<Value *> &Ops) {
10343  auto *SplatUndef = UndefValue::get(Ty);
10344  Ops.insert(Ops.begin(), SplatUndef);
10345 }
10346 
10349  llvm::Type *ResultType,
10350  ArrayRef<Value *> Ops) {
10351  if (TypeFlags.isOverloadNone())
10352  return {};
10353 
10354  llvm::Type *DefaultType = getSVEType(TypeFlags);
10355 
10356  if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10357  return {DefaultType, Ops[1]->getType()};
10358 
10359  if (TypeFlags.isOverloadWhileRW())
10360  return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10361 
10362  if (TypeFlags.isOverloadCvt())
10363  return {Ops[0]->getType(), Ops.back()->getType()};
10364 
10365  if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10366  ResultType->isVectorTy())
10367  return {ResultType, Ops[1]->getType()};
10368 
10369  assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10370  return {DefaultType};
10371 }
10372 
10374  llvm::Type *Ty,
10375  ArrayRef<Value *> Ops) {
10376  assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10377  "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
10378 
10379  unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10380  auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10381  TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10382  Value *Idx = ConstantInt::get(CGM.Int64Ty,
10383  I * SingleVecTy->getMinNumElements());
10384 
10385  if (TypeFlags.isTupleSet())
10386  return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10387  return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10388 }
10389 
10391  llvm::Type *Ty,
10392  ArrayRef<Value *> Ops) {
10393  assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10394 
10395  auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10396  unsigned MinElts = SrcTy->getMinNumElements();
10397  Value *Call = llvm::PoisonValue::get(Ty);
10398  for (unsigned I = 0; I < Ops.size(); I++) {
10399  Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10400  Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10401  }
10402 
10403  return Call;
10404 }
10405 
10407  // Multi-vector results should be broken up into a single (wide) result
10408  // vector.
10409  auto *StructTy = dyn_cast<StructType>(Call->getType());
10410  if (!StructTy)
10411  return Call;
10412 
10413  auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10414  if (!VTy)
10415  return Call;
10416  unsigned N = StructTy->getNumElements();
10417 
10418  // We may need to emit a cast to a svbool_t
10419  bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10420  unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10421 
10422  ScalableVectorType *WideVTy =
10423  ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10424  Value *Ret = llvm::PoisonValue::get(WideVTy);
10425  for (unsigned I = 0; I < N; ++I) {
10426  Value *SRet = Builder.CreateExtractValue(Call, I);
10427  assert(SRet->getType() == VTy && "Unexpected type for result value");
10428  Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10429 
10430  if (IsPredTy)
10431  SRet = EmitSVEPredicateCast(
10432  SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10433 
10434  Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10435  }
10436  Call = Ret;
10437 
10438  return Call;
10439 }
10440 
10442  unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10443  SVETypeFlags TypeFlags) {
10444  // Find out if any arguments are required to be integer constant expressions.
10445  unsigned ICEArguments = 0;
10447  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10448  assert(Error == ASTContext::GE_None && "Should not codegen an error");
10449 
10450  // Tuple set/get only requires one insert/extract vector, which is
10451  // created by EmitSVETupleSetOrGet.
10452  bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10453 
10454  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10455  bool IsICE = ICEArguments & (1 << i);
10456  Value *Arg = EmitScalarExpr(E->getArg(i));
10457 
10458  if (IsICE) {
10459  // If this is required to be a constant, constant fold it so that we know
10460  // that the generated intrinsic gets a ConstantInt.
10461  std::optional<llvm::APSInt> Result =
10462  E->getArg(i)->getIntegerConstantExpr(getContext());
10463  assert(Result && "Expected argument to be a constant");
10464 
10465  // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10466  // truncate because the immediate has been range checked and no valid
10467  // immediate requires more than a handful of bits.
10468  *Result = Result->extOrTrunc(32);
10469  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10470  continue;
10471  }
10472 
10473  if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10474  Ops.push_back(Arg);
10475  continue;
10476  }
10477 
10478  auto *VTy = cast<ScalableVectorType>(Arg->getType());
10479  unsigned MinElts = VTy->getMinNumElements();
10480  bool IsPred = VTy->getElementType()->isIntegerTy(1);
10481  unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10482 
10483  if (N == 1) {
10484  Ops.push_back(Arg);
10485  continue;
10486  }
10487 
10488  for (unsigned I = 0; I < N; ++I) {
10489  Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10490  auto *NewVTy =
10491  ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10492  Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10493  }
10494  }
10495 }
10496 
10498  const CallExpr *E) {
10499  llvm::Type *Ty = ConvertType(E->getType());
10500  if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10501  BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10502  Value *Val = EmitScalarExpr(E->getArg(0));
10503  return EmitSVEReinterpret(Val, Ty);
10504  }
10505 
10506  auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10508 
10510  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10511  GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10512 
10513  if (TypeFlags.isLoad())
10514  return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10515  TypeFlags.isZExtReturn());
10516  else if (TypeFlags.isStore())
10517  return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10518  else if (TypeFlags.isGatherLoad())
10519  return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10520  else if (TypeFlags.isScatterStore())
10521  return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10522  else if (TypeFlags.isPrefetch())
10523  return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10524  else if (TypeFlags.isGatherPrefetch())
10525  return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10526  else if (TypeFlags.isStructLoad())
10527  return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10528  else if (TypeFlags.isStructStore())
10529  return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10530  else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10531  return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10532  else if (TypeFlags.isTupleCreate())
10533  return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10534  else if (TypeFlags.isUndef())
10535  return UndefValue::get(Ty);
10536  else if (Builtin->LLVMIntrinsic != 0) {
10537  if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10538  InsertExplicitZeroOperand(Builder, Ty, Ops);
10539 
10540  if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10541  InsertExplicitUndefOperand(Builder, Ty, Ops);
10542 
10543  // Some ACLE builtins leave out the argument to specify the predicate
10544  // pattern, which is expected to be expanded to an SV_ALL pattern.
10545  if (TypeFlags.isAppendSVALL())
10546  Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10547  if (TypeFlags.isInsertOp1SVALL())
10548  Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10549 
10550  // Predicates must match the main datatype.
10551  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10552  if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10553  if (PredTy->getElementType()->isIntegerTy(1))
10554  Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10555 
10556  // Splat scalar operand to vector (intrinsics with _n infix)
10557  if (TypeFlags.hasSplatOperand()) {
10558  unsigned OpNo = TypeFlags.getSplatOperand();
10559  Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10560  }
10561 
10562  if (TypeFlags.isReverseCompare())
10563  std::swap(Ops[1], Ops[2]);
10564  else if (TypeFlags.isReverseUSDOT())
10565  std::swap(Ops[1], Ops[2]);
10566  else if (TypeFlags.isReverseMergeAnyBinOp() &&
10567  TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10568  std::swap(Ops[1], Ops[2]);
10569  else if (TypeFlags.isReverseMergeAnyAccOp() &&
10570  TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10571  std::swap(Ops[1], Ops[3]);
10572 
10573  // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10574  if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10575  llvm::Type *OpndTy = Ops[1]->getType();
10576  auto *SplatZero = Constant::getNullValue(OpndTy);
10577  Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10578  }
10579 
10580  Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10581  getSVEOverloadTypes(TypeFlags, Ty, Ops));
10582  Value *Call = Builder.CreateCall(F, Ops);
10583 
10584  // Predicate results must be converted to svbool_t.
10585  if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10586  if (PredTy->getScalarType()->isIntegerTy(1))
10587  Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10588 
10589  return FormSVEBuiltinResult(Call);
10590  }
10591 
10592  switch (BuiltinID) {
10593  default:
10594  return nullptr;
10595 
10596  case SVE::BI__builtin_sve_svreinterpret_b: {
10597  auto SVCountTy =
10598  llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10599  Function *CastFromSVCountF =
10600  CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10601  return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10602  }
10603  case SVE::BI__builtin_sve_svreinterpret_c: {
10604  auto SVCountTy =
10605  llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10606  Function *CastToSVCountF =
10607  CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10608  return Builder.CreateCall(CastToSVCountF, Ops[0]);
10609  }
10610 
10611  case SVE::BI__builtin_sve_svpsel_lane_b8:
10612  case SVE::BI__builtin_sve_svpsel_lane_b16:
10613  case SVE::BI__builtin_sve_svpsel_lane_b32:
10614  case SVE::BI__builtin_sve_svpsel_lane_b64:
10615  case SVE::BI__builtin_sve_svpsel_lane_c8:
10616  case SVE::BI__builtin_sve_svpsel_lane_c16:
10617  case SVE::BI__builtin_sve_svpsel_lane_c32:
10618  case SVE::BI__builtin_sve_svpsel_lane_c64: {
10619  bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10620  assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10621  "aarch64.svcount")) &&
10622  "Unexpected TargetExtType");
10623  auto SVCountTy =
10624  llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10625  Function *CastFromSVCountF =
10626  CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10627  Function *CastToSVCountF =
10628  CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10629 
10630  auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10631  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10632  llvm::Value *Ops0 =
10633  IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10634  llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10635  llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10636  return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10637  }
10638  case SVE::BI__builtin_sve_svmov_b_z: {
10639  // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10640  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10641  llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10642  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10643  return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10644  }
10645 
10646  case SVE::BI__builtin_sve_svnot_b_z: {
10647  // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10648  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10649  llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10650  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10651  return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10652  }
10653 
10654  case SVE::BI__builtin_sve_svmovlb_u16:
10655  case SVE::BI__builtin_sve_svmovlb_u32:
10656  case SVE::BI__builtin_sve_svmovlb_u64:
10657  return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10658 
10659  case SVE::BI__builtin_sve_svmovlb_s16:
10660  case SVE::BI__builtin_sve_svmovlb_s32:
10661  case SVE::BI__builtin_sve_svmovlb_s64:
10662  return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10663 
10664  case SVE::BI__builtin_sve_svmovlt_u16:
10665  case SVE::BI__builtin_sve_svmovlt_u32:
10666  case SVE::BI__builtin_sve_svmovlt_u64:
10667  return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10668 
10669  case SVE::BI__builtin_sve_svmovlt_s16:
10670  case SVE::BI__builtin_sve_svmovlt_s32:
10671  case SVE::BI__builtin_sve_svmovlt_s64:
10672  return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10673 
10674  case SVE::BI__builtin_sve_svpmullt_u16:
10675  case SVE::BI__builtin_sve_svpmullt_u64:
10676  case SVE::BI__builtin_sve_svpmullt_n_u16:
10677  case SVE::BI__builtin_sve_svpmullt_n_u64:
10678  return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10679 
10680  case SVE::BI__builtin_sve_svpmullb_u16:
10681  case SVE::BI__builtin_sve_svpmullb_u64:
10682  case SVE::BI__builtin_sve_svpmullb_n_u16:
10683  case SVE::BI__builtin_sve_svpmullb_n_u64:
10684  return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10685 
10686  case SVE::BI__builtin_sve_svdup_n_b8:
10687  case SVE::BI__builtin_sve_svdup_n_b16:
10688  case SVE::BI__builtin_sve_svdup_n_b32:
10689  case SVE::BI__builtin_sve_svdup_n_b64: {
10690  Value *CmpNE =
10691  Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10692  llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10693  Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10694  return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10695  }
10696 
10697  case SVE::BI__builtin_sve_svdupq_n_b8:
10698  case SVE::BI__builtin_sve_svdupq_n_b16:
10699  case SVE::BI__builtin_sve_svdupq_n_b32:
10700  case SVE::BI__builtin_sve_svdupq_n_b64:
10701  case SVE::BI__builtin_sve_svdupq_n_u8:
10702  case SVE::BI__builtin_sve_svdupq_n_s8:
10703  case SVE::BI__builtin_sve_svdupq_n_u64:
10704  case SVE::BI__builtin_sve_svdupq_n_f64:
10705  case SVE::BI__builtin_sve_svdupq_n_s64:
10706  case SVE::BI__builtin_sve_svdupq_n_u16:
10707  case SVE::BI__builtin_sve_svdupq_n_f16:
10708  case SVE::BI__builtin_sve_svdupq_n_bf16:
10709  case SVE::BI__builtin_sve_svdupq_n_s16:
10710  case SVE::BI__builtin_sve_svdupq_n_u32:
10711  case SVE::BI__builtin_sve_svdupq_n_f32:
10712  case SVE::BI__builtin_sve_svdupq_n_s32: {
10713  // These builtins are implemented by storing each element to an array and using
10714  // ld1rq to materialize a vector.
10715  unsigned NumOpnds = Ops.size();
10716 
10717  bool IsBoolTy =
10718  cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10719 
10720  // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10721  // so that the compare can use the width that is natural for the expected
10722  // number of predicate lanes.
10723  llvm::Type *EltTy = Ops[0]->getType();
10724  if (IsBoolTy)
10725  EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10726 
10728  for (unsigned I = 0; I < NumOpnds; ++I)
10729  VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10730  Value *Vec = BuildVector(VecOps);
10731 
10732  llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10733  Value *InsertSubVec = Builder.CreateInsertVector(
10734  OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10735 
10736  Function *F =
10737  CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10738  Value *DupQLane =
10739  Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10740 
10741  if (!IsBoolTy)
10742  return DupQLane;
10743 
10744  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10745  Value *Pred = EmitSVEAllTruePred(TypeFlags);
10746 
10747  // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10748  F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10749  : Intrinsic::aarch64_sve_cmpne_wide,
10750  OverloadedTy);
10751  Value *Call = Builder.CreateCall(
10752  F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10753  return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10754  }
10755 
10756  case SVE::BI__builtin_sve_svpfalse_b:
10757  return ConstantInt::getFalse(Ty);
10758 
10759  case SVE::BI__builtin_sve_svpfalse_c: {
10760  auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10761  Function *CastToSVCountF =
10762  CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10763  return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10764  }
10765 
10766  case SVE::BI__builtin_sve_svlen_bf16:
10767  case SVE::BI__builtin_sve_svlen_f16:
10768  case SVE::BI__builtin_sve_svlen_f32:
10769  case SVE::BI__builtin_sve_svlen_f64:
10770  case SVE::BI__builtin_sve_svlen_s8:
10771  case SVE::BI__builtin_sve_svlen_s16:
10772  case SVE::BI__builtin_sve_svlen_s32:
10773  case SVE::BI__builtin_sve_svlen_s64:
10774  case SVE::BI__builtin_sve_svlen_u8:
10775  case SVE::BI__builtin_sve_svlen_u16:
10776  case SVE::BI__builtin_sve_svlen_u32:
10777  case SVE::BI__builtin_sve_svlen_u64: {
10778  SVETypeFlags TF(Builtin->TypeModifier);
10779  auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10780  auto *NumEls =
10781  llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10782 
10783  Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10784  return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10785  }
10786 
10787  case SVE::BI__builtin_sve_svtbl2_u8:
10788  case SVE::BI__builtin_sve_svtbl2_s8:
10789  case SVE::BI__builtin_sve_svtbl2_u16:
10790  case SVE::BI__builtin_sve_svtbl2_s16:
10791  case SVE::BI__builtin_sve_svtbl2_u32:
10792  case SVE::BI__builtin_sve_svtbl2_s32:
10793  case SVE::BI__builtin_sve_svtbl2_u64:
10794  case SVE::BI__builtin_sve_svtbl2_s64:
10795  case SVE::BI__builtin_sve_svtbl2_f16:
10796  case SVE::BI__builtin_sve_svtbl2_bf16:
10797  case SVE::BI__builtin_sve_svtbl2_f32:
10798  case SVE::BI__builtin_sve_svtbl2_f64: {
10799  SVETypeFlags TF(Builtin->TypeModifier);
10800  auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10801  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10802  return Builder.CreateCall(F, Ops);
10803  }
10804 
10805  case SVE::BI__builtin_sve_svset_neonq_s8:
10806  case SVE::BI__builtin_sve_svset_neonq_s16:
10807  case SVE::BI__builtin_sve_svset_neonq_s32:
10808  case SVE::BI__builtin_sve_svset_neonq_s64:
10809  case SVE::BI__builtin_sve_svset_neonq_u8:
10810  case SVE::BI__builtin_sve_svset_neonq_u16:
10811  case SVE::BI__builtin_sve_svset_neonq_u32:
10812  case SVE::BI__builtin_sve_svset_neonq_u64:
10813  case SVE::BI__builtin_sve_svset_neonq_f16:
10814  case SVE::BI__builtin_sve_svset_neonq_f32:
10815  case SVE::BI__builtin_sve_svset_neonq_f64:
10816  case SVE::BI__builtin_sve_svset_neonq_bf16: {
10817  return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10818  }
10819 
10820  case SVE::BI__builtin_sve_svget_neonq_s8:
10821  case SVE::BI__builtin_sve_svget_neonq_s16:
10822  case SVE::BI__builtin_sve_svget_neonq_s32:
10823  case SVE::BI__builtin_sve_svget_neonq_s64:
10824  case SVE::BI__builtin_sve_svget_neonq_u8:
10825  case SVE::BI__builtin_sve_svget_neonq_u16:
10826  case SVE::BI__builtin_sve_svget_neonq_u32:
10827  case SVE::BI__builtin_sve_svget_neonq_u64:
10828  case SVE::BI__builtin_sve_svget_neonq_f16:
10829  case SVE::BI__builtin_sve_svget_neonq_f32:
10830  case SVE::BI__builtin_sve_svget_neonq_f64:
10831  case SVE::BI__builtin_sve_svget_neonq_bf16: {
10832  return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10833  }
10834 
10835  case SVE::BI__builtin_sve_svdup_neonq_s8:
10836  case SVE::BI__builtin_sve_svdup_neonq_s16:
10837  case SVE::BI__builtin_sve_svdup_neonq_s32:
10838  case SVE::BI__builtin_sve_svdup_neonq_s64:
10839  case SVE::BI__builtin_sve_svdup_neonq_u8:
10840  case SVE::BI__builtin_sve_svdup_neonq_u16:
10841  case SVE::BI__builtin_sve_svdup_neonq_u32:
10842  case SVE::BI__builtin_sve_svdup_neonq_u64:
10843  case SVE::BI__builtin_sve_svdup_neonq_f16:
10844  case SVE::BI__builtin_sve_svdup_neonq_f32:
10845  case SVE::BI__builtin_sve_svdup_neonq_f64:
10846  case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10847  Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10848  Builder.getInt64(0));
10849  return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10850  {Insert, Builder.getInt64(0)});
10851  }
10852  }
10853 
10854  /// Should not happen
10855  return nullptr;
10856 }
10857 
10858 static void swapCommutativeSMEOperands(unsigned BuiltinID,
10859  SmallVectorImpl<Value *> &Ops) {
10860  unsigned MultiVec;
10861  switch (BuiltinID) {
10862  default:
10863  return;
10864  case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10865  MultiVec = 1;
10866  break;
10867  case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10868  case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10869  MultiVec = 2;
10870  break;
10871  case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10872  case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10873  MultiVec = 4;
10874  break;
10875  }
10876 
10877  if (MultiVec > 0)
10878  for (unsigned I = 0; I < MultiVec; ++I)
10879  std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10880 }
10881 
10883  const CallExpr *E) {
10884  auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10886 
10888  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10889  GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10890 
10891  if (TypeFlags.isLoad() || TypeFlags.isStore())
10892  return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10893  else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10894  return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10895  else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10896  BuiltinID == SME::BI__builtin_sme_svzero_za)
10897  return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10898  else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10899  BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10900  BuiltinID == SME::BI__builtin_sme_svldr_za ||
10901  BuiltinID == SME::BI__builtin_sme_svstr_za)
10902  return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10903 
10904  // Handle builtins which require their multi-vector operands to be swapped
10905  swapCommutativeSMEOperands(BuiltinID, Ops);
10906 
10907  // Should not happen!
10908  if (Builtin->LLVMIntrinsic == 0)
10909  return nullptr;
10910 
10911  // Predicates must match the main datatype.
10912  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10913  if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10914  if (PredTy->getElementType()->isIntegerTy(1))
10915  Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10916 
10917  Function *F =
10918  TypeFlags.isOverloadNone()
10919  ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10920  : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10921  Value *Call = Builder.CreateCall(F, Ops);
10922 
10923  return FormSVEBuiltinResult(Call);
10924 }
10925 
10927  const CallExpr *E,
10928  llvm::Triple::ArchType Arch) {
10929  if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10930  BuiltinID <= clang::AArch64::LastSVEBuiltin)
10931  return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10932 
10933  if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10934  BuiltinID <= clang::AArch64::LastSMEBuiltin)
10935  return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10936 
10937  if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10938  return EmitAArch64CpuSupports(E);
10939 
10940  unsigned HintID = static_cast<unsigned>(-1);
10941  switch (BuiltinID) {
10942  default: break;
10943  case clang::AArch64::BI__builtin_arm_nop:
10944  HintID = 0;
10945  break;
10946  case clang::AArch64::BI__builtin_arm_yield:
10947  case clang::AArch64::BI__yield:
10948  HintID = 1;
10949  break;
10950  case clang::AArch64::BI__builtin_arm_wfe:
10951  case clang::AArch64::BI__wfe:
10952  HintID = 2;
10953  break;
10954  case clang::AArch64::BI__builtin_arm_wfi:
10955  case clang::AArch64::BI__wfi:
10956  HintID = 3;
10957  break;
10958  case clang::AArch64::BI__builtin_arm_sev:
10959  case clang::AArch64::BI__sev:
10960  HintID = 4;
10961  break;
10962  case clang::AArch64::BI__builtin_arm_sevl:
10963  case clang::AArch64::BI__sevl:
10964  HintID = 5;
10965  break;
10966  }
10967 
10968  if (HintID != static_cast<unsigned>(-1)) {
10969  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10970  return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10971  }
10972 
10973  if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10974  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10975  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10976  return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
10977  }
10978 
10979  if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10980  // Create call to __arm_sme_state and store the results to the two pointers.
10981  CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
10982  llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10983  false),
10984  "__arm_sme_state"));
10985  auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10986  "aarch64_pstate_sm_compatible");
10987  CI->setAttributes(Attrs);
10988  CI->setCallingConv(
10990  AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10991  Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10993  return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10995  }
10996 
10997  if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10998  assert((getContext().getTypeSize(E->getType()) == 32) &&
10999  "rbit of unusual size!");
11000  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11001  return Builder.CreateCall(
11002  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11003  }
11004  if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11005  assert((getContext().getTypeSize(E->getType()) == 64) &&
11006  "rbit of unusual size!");
11007  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11008  return Builder.CreateCall(
11009  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11010  }
11011 
11012  if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11013  BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11014  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11015  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11016  Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11017  if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11018  Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11019  return Res;
11020  }
11021 
11022  if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11023  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11024  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11025  "cls");
11026  }
11027  if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11028  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11029  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11030  "cls");
11031  }
11032 
11033  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11034  BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11035  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11036  llvm::Type *Ty = Arg->getType();
11037  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11038  Arg, "frint32z");
11039  }
11040 
11041  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11042  BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11043  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11044  llvm::Type *Ty = Arg->getType();
11045  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11046  Arg, "frint64z");
11047  }
11048 
11049  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11050  BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11051  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11052  llvm::Type *Ty = Arg->getType();
11053  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11054  Arg, "frint32x");
11055  }
11056 
11057  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11058  BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11059  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11060  llvm::Type *Ty = Arg->getType();
11061  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11062  Arg, "frint64x");
11063  }
11064 
11065  if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11066  assert((getContext().getTypeSize(E->getType()) == 32) &&
11067  "__jcvt of unusual size!");
11068  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11069  return Builder.CreateCall(
11070  CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11071  }
11072 
11073  if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11074  BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11075  BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11076  BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11077  llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11078  llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11079 
11080  if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11081  // Load from the address via an LLVM intrinsic, receiving a
11082  // tuple of 8 i64 words, and store each one to ValPtr.
11083  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11084  llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11085  llvm::Value *ToRet;
11086  for (size_t i = 0; i < 8; i++) {
11087  llvm::Value *ValOffsetPtr =
11088  Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11089  Address Addr =
11090  Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11091  ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11092  }
11093  return ToRet;
11094  } else {
11095  // Load 8 i64 words from ValPtr, and store them to the address
11096  // via an LLVM intrinsic.
11098  Args.push_back(MemAddr);
11099  for (size_t i = 0; i < 8; i++) {
11100  llvm::Value *ValOffsetPtr =
11101  Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11102  Address Addr =
11103  Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11104  Args.push_back(Builder.CreateLoad(Addr));
11105  }
11106 
11107  auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11108  ? Intrinsic::aarch64_st64b
11109  : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11110  ? Intrinsic::aarch64_st64bv
11111  : Intrinsic::aarch64_st64bv0);
11112  Function *F = CGM.getIntrinsic(Intr);
11113  return Builder.CreateCall(F, Args);
11114  }
11115  }
11116 
11117  if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11118  BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11119 
11120  auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11121  ? Intrinsic::aarch64_rndr
11122  : Intrinsic::aarch64_rndrrs);
11123  Function *F = CGM.getIntrinsic(Intr);
11124  llvm::Value *Val = Builder.CreateCall(F);
11125  Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11126  Value *Status = Builder.CreateExtractValue(Val, 1);
11127 
11128  Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11129  Builder.CreateStore(RandomValue, MemAddress);
11130  Status = Builder.CreateZExt(Status, Int32Ty);
11131  return Status;
11132  }
11133 
11134  if (BuiltinID == clang::AArch64::BI__clear_cache) {
11135  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11136  const FunctionDecl *FD = E->getDirectCallee();
11137  Value *Ops[2];
11138  for (unsigned i = 0; i < 2; i++)
11139  Ops[i] = EmitScalarExpr(E->getArg(i));
11140  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11141  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11142  StringRef Name = FD->getName();
11143  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11144  }
11145 
11146  if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11147  BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11148  getContext().getTypeSize(E->getType()) == 128) {
11149  Function *F =
11150  CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11151  ? Intrinsic::aarch64_ldaxp
11152  : Intrinsic::aarch64_ldxp);
11153 
11154  Value *LdPtr = EmitScalarExpr(E->getArg(0));
11155  Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11156 
11157  Value *Val0 = Builder.CreateExtractValue(Val, 1);
11158  Value *Val1 = Builder.CreateExtractValue(Val, 0);
11159  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11160  Val0 = Builder.CreateZExt(Val0, Int128Ty);
11161  Val1 = Builder.CreateZExt(Val1, Int128Ty);
11162 
11163  Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11164  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11165  Val = Builder.CreateOr(Val, Val1);
11166  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11167  } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11168  BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11169  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11170 
11171  QualType Ty = E->getType();
11172  llvm::Type *RealResTy = ConvertType(Ty);
11173  llvm::Type *IntTy =
11174  llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11175  llvm::Type *PtrTy = IntTy->getPointerTo();
11176  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
11177 
11178  Function *F =
11179  CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11180  ? Intrinsic::aarch64_ldaxr
11181  : Intrinsic::aarch64_ldxr,
11182  UnqualPtrTy);
11183  CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11184  Val->addParamAttr(
11185  0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11186 
11187  if (RealResTy->isPointerTy())
11188  return Builder.CreateIntToPtr(Val, RealResTy);
11189 
11190  llvm::Type *IntResTy = llvm::IntegerType::get(
11191  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11192  return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11193  RealResTy);
11194  }
11195 
11196  if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11197  BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11198  getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11199  Function *F =
11200  CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11201  ? Intrinsic::aarch64_stlxp
11202  : Intrinsic::aarch64_stxp);
11203  llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11204 
11205  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11206  EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11207 
11208  Tmp = Tmp.withElementType(STy);
11209  llvm::Value *Val = Builder.CreateLoad(Tmp);
11210 
11211  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11212  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11213  Value *StPtr = EmitScalarExpr(E->getArg(1));
11214  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11215  }
11216 
11217  if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11218  BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11219  Value *StoreVal = EmitScalarExpr(E->getArg(0));
11220  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11221 
11222  QualType Ty = E->getArg(0)->getType();
11223  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
11224  getContext().getTypeSize(Ty));
11225  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
11226 
11227  if (StoreVal->getType()->isPointerTy())
11228  StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11229  else {
11230  llvm::Type *IntTy = llvm::IntegerType::get(
11231  getLLVMContext(),
11232  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11233  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11234  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11235  }
11236 
11237  Function *F =
11238  CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11239  ? Intrinsic::aarch64_stlxr
11240  : Intrinsic::aarch64_stxr,
11241  StoreAddr->getType());
11242  CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11243  CI->addParamAttr(
11244  1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11245  return CI;
11246  }
11247 
11248  if (BuiltinID == clang::AArch64::BI__getReg) {
11249  Expr::EvalResult Result;
11250  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11251  llvm_unreachable("Sema will ensure that the parameter is constant");
11252 
11253  llvm::APSInt Value = Result.Val.getInt();
11254  LLVMContext &Context = CGM.getLLVMContext();
11255  std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11256 
11257  llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11258  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11259  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11260 
11261  llvm::Function *F =
11262  CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11263  return Builder.CreateCall(F, Metadata);
11264  }
11265 
11266  if (BuiltinID == clang::AArch64::BI__break) {
11267  Expr::EvalResult Result;
11268  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11269  llvm_unreachable("Sema will ensure that the parameter is constant");
11270 
11271  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11272  return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11273  }
11274 
11275  if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11276  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11277  return Builder.CreateCall(F);
11278  }
11279 
11280  if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11281  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11282  llvm::SyncScope::SingleThread);
11283 
11284  // CRC32
11285  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11286  switch (BuiltinID) {
11287  case clang::AArch64::BI__builtin_arm_crc32b:
11288  CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11289  case clang::AArch64::BI__builtin_arm_crc32cb:
11290  CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11291  case clang::AArch64::BI__builtin_arm_crc32h:
11292  CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11293  case clang::AArch64::BI__builtin_arm_crc32ch:
11294  CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11295  case clang::AArch64::BI__builtin_arm_crc32w:
11296  CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11297  case clang::AArch64::BI__builtin_arm_crc32cw:
11298  CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11299  case clang::AArch64::BI__builtin_arm_crc32d:
11300  CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11301  case clang::AArch64::BI__builtin_arm_crc32cd:
11302  CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11303  }
11304 
11305  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11306  Value *Arg0 = EmitScalarExpr(E->getArg(0));
11307  Value *Arg1 = EmitScalarExpr(E->getArg(1));
11308  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11309 
11310  llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11311  Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11312 
11313  return Builder.CreateCall(F, {Arg0, Arg1});
11314  }
11315 
11316  // Memory Operations (MOPS)
11317  if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11318  Value *Dst = EmitScalarExpr(E->getArg(0));
11319  Value *Val = EmitScalarExpr(E->getArg(1));
11320  Value *Size = EmitScalarExpr(E->getArg(2));
11321  Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11322  Val = Builder.CreateTrunc(Val, Int8Ty);
11323  Size = Builder.CreateIntCast(Size, Int64Ty, false);
11324  return Builder.CreateCall(
11325  CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11326  }
11327 
11328  // Memory Tagging Extensions (MTE) Intrinsics
11329  Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11330  switch (BuiltinID) {
11331  case clang::AArch64::BI__builtin_arm_irg:
11332  MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11333  case clang::AArch64::BI__builtin_arm_addg:
11334  MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11335  case clang::AArch64::BI__builtin_arm_gmi:
11336  MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11337  case clang::AArch64::BI__builtin_arm_ldg:
11338  MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11339  case clang::AArch64::BI__builtin_arm_stg:
11340  MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11341  case clang::AArch64::BI__builtin_arm_subp:
11342  MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11343  }
11344 
11345  if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11346  llvm::Type *T = ConvertType(E->getType());
11347 
11348  if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11349  Value *Pointer = EmitScalarExpr(E->getArg(0));
11350  Value *Mask = EmitScalarExpr(E->getArg(1));
11351 
11352  Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11353  Mask = Builder.CreateZExt(Mask, Int64Ty);
11354  Value *RV = Builder.CreateCall(
11355  CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11356  return Builder.CreatePointerCast(RV, T);
11357  }
11358  if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11359  Value *Pointer = EmitScalarExpr(E->getArg(0));
11360  Value *TagOffset = EmitScalarExpr(E->getArg(1));
11361 
11362  Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11363  TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11364  Value *RV = Builder.CreateCall(
11365  CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11366  return Builder.CreatePointerCast(RV, T);
11367  }
11368  if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11369  Value *Pointer = EmitScalarExpr(E->getArg(0));
11370  Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11371 
11372  ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11373  Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11374  return Builder.CreateCall(
11375  CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11376  }
11377  // Although it is possible to supply a different return
11378  // address (first arg) to this intrinsic, for now we set
11379  // return address same as input address.
11380  if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11381  Value *TagAddress = EmitScalarExpr(E->getArg(0));
11382  TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11383  Value *RV = Builder.CreateCall(
11384  CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11385  return Builder.CreatePointerCast(RV, T);
11386  }
11387  // Although it is possible to supply a different tag (to set)
11388  // to this intrinsic (as first arg), for now we supply
11389  // the tag that is in input address arg (common use case).
11390  if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11391  Value *TagAddress = EmitScalarExpr(E->getArg(0));
11392  TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11393  return Builder.CreateCall(
11394  CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11395  }
11396  if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11397  Value *PointerA = EmitScalarExpr(E->getArg(0));
11398  Value *PointerB = EmitScalarExpr(E->getArg(1));
11399  PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11400  PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11401  return Builder.CreateCall(
11402  CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11403  }
11404  }
11405 
11406  if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11407  BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11408  BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11409  BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11410  BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11411  BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11412  BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11413  BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11414 
11416  if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11417  BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11418  BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11419  BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11421 
11422  bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11423  BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11424 
11425  bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11426  BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11427 
11428  bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11429  BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11430 
11431  llvm::Type *ValueType;
11432  llvm::Type *RegisterType = Int64Ty;
11433  if (Is32Bit) {
11434  ValueType = Int32Ty;
11435  } else if (Is128Bit) {
11436  llvm::Type *Int128Ty =
11437  llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11438  ValueType = Int128Ty;
11439  RegisterType = Int128Ty;
11440  } else if (IsPointerBuiltin) {
11441  ValueType = VoidPtrTy;
11442  } else {
11443  ValueType = Int64Ty;
11444  };
11445 
11446  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11447  AccessKind);
11448  }
11449 
11450  if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11451  BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11452  LLVMContext &Context = CGM.getLLVMContext();
11453 
11454  unsigned SysReg =
11455  E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11456 
11457  std::string SysRegStr;
11458  llvm::raw_string_ostream(SysRegStr) <<
11459  ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11460  ((SysReg >> 11) & 7) << ":" <<
11461  ((SysReg >> 7) & 15) << ":" <<
11462  ((SysReg >> 3) & 15) << ":" <<
11463  ( SysReg & 7);
11464 
11465  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11466  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11467  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11468 
11469  llvm::Type *RegisterType = Int64Ty;
11470  llvm::Type *Types[] = { RegisterType };
11471 
11472  if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11473  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11474 
11475  return Builder.CreateCall(F, Metadata);
11476  }
11477 
11478  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11479  llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11480 
11481  return Builder.CreateCall(F, { Metadata, ArgValue });
11482  }
11483 
11484  if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11485  llvm::Function *F =
11486  CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11487  return Builder.CreateCall(F);
11488  }
11489 
11490  if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11491  llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11492  return Builder.CreateCall(F);
11493  }
11494 
11495  if (BuiltinID == clang::AArch64::BI__mulh ||
11496  BuiltinID == clang::AArch64::BI__umulh) {
11497  llvm::Type *ResType = ConvertType(E->getType());
11498  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11499 
11500  bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11501  Value *LHS =
11502  Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11503  Value *RHS =
11504  Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11505 
11506  Value *MulResult, *HigherBits;
11507  if (IsSigned) {
11508  MulResult = Builder.CreateNSWMul(LHS, RHS);
11509  HigherBits = Builder.CreateAShr(MulResult, 64);
11510  } else {
11511  MulResult = Builder.CreateNUWMul(LHS, RHS);
11512  HigherBits = Builder.CreateLShr(MulResult, 64);
11513  }
11514  HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11515 
11516  return HigherBits;
11517  }
11518 
11519  if (BuiltinID == AArch64::BI__writex18byte ||
11520  BuiltinID == AArch64::BI__writex18word ||
11521  BuiltinID == AArch64::BI__writex18dword ||
11522  BuiltinID == AArch64::BI__writex18qword) {
11523  // Read x18 as i8*
11524  LLVMContext &Context = CGM.getLLVMContext();
11525  llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11526  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11527  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11528  llvm::Function *F =
11529  CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11530  llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11531  X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11532 
11533  // Store val at x18 + offset
11534  Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11535  Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11536  Value *Val = EmitScalarExpr(E->getArg(1));
11537  StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11538  return Store;
11539  }
11540 
11541  if (BuiltinID == AArch64::BI__readx18byte ||
11542  BuiltinID == AArch64::BI__readx18word ||
11543  BuiltinID == AArch64::BI__readx18dword ||
11544  BuiltinID == AArch64::BI__readx18qword) {
11545  llvm::Type *IntTy = ConvertType(E->getType());
11546 
11547  // Read x18 as i8*
11548  LLVMContext &Context = CGM.getLLVMContext();
11549  llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11550  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11551  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11552  llvm::Function *F =
11553  CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11554  llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11555  X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11556 
11557  // Load x18 + offset
11558  Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11559  Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11560  LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11561  return Load;
11562  }
11563 
11564  if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11565  BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11566  BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11567  BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11568  Value *Arg = EmitScalarExpr(E->getArg(0));
11569  llvm::Type *RetTy = ConvertType(E->getType());
11570  return Builder.CreateBitCast(Arg, RetTy);
11571  }
11572 
11573  if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11574  BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11575  BuiltinID == AArch64::BI_CountLeadingZeros ||
11576  BuiltinID == AArch64::BI_CountLeadingZeros64) {
11577  Value *Arg = EmitScalarExpr(E->getArg(0));
11578  llvm::Type *ArgType = Arg->getType();
11579 
11580  if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11581  BuiltinID == AArch64::BI_CountLeadingOnes64)
11582  Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11583 
11584  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11585  Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11586 
11587  if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11588  BuiltinID == AArch64::BI_CountLeadingZeros64)
11589  Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11590  return Result;
11591  }
11592 
11593  if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11594  BuiltinID == AArch64::BI_CountLeadingSigns64) {
11595  Value *Arg = EmitScalarExpr(E->getArg(0));
11596 
11597  Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11598  ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11599  : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11600 
11601  Value *Result = Builder.CreateCall(F, Arg, "cls");
11602  if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11603  Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11604  return Result;
11605  }
11606 
11607  if (BuiltinID == AArch64::BI_CountOneBits ||
11608  BuiltinID == AArch64::BI_CountOneBits64) {
11609  Value *ArgValue = EmitScalarExpr(E->getArg(0));
11610  llvm::Type *ArgType = ArgValue->getType();
11611  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11612 
11613  Value *Result = Builder.CreateCall(F, ArgValue);
11614  if (BuiltinID == AArch64::BI_CountOneBits64)
11615  Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11616  return Result;
11617  }
11618 
11619  if (BuiltinID == AArch64::BI__prefetch) {
11620  Value *Address = EmitScalarExpr(E->getArg(0));
11621  Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11622  Value *Locality = ConstantInt::get(Int32Ty, 3);
11623  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11624  Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11625  return Builder.CreateCall(F, {Address, RW, Locality, Data});
11626  }
11627 
11628  // Handle MSVC intrinsics before argument evaluation to prevent double
11629  // evaluation.
11630  if (std::optional<MSVCIntrin> MsvcIntId =
11631  translateAarch64ToMsvcIntrin(BuiltinID))
11632  return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11633 
11634  // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11635  auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11636  return P.first == BuiltinID;
11637  });
11638  if (It != end(NEONEquivalentIntrinsicMap))
11639  BuiltinID = It->second;
11640 
11641  // Find out if any arguments are required to be integer constant
11642  // expressions.
11643  unsigned ICEArguments = 0;
11645  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11646  assert(Error == ASTContext::GE_None && "Should not codegen an error");
11647 
11649  Address PtrOp0 = Address::invalid();
11650  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11651  if (i == 0) {
11652  switch (BuiltinID) {
11653  case NEON::BI__builtin_neon_vld1_v:
11654  case NEON::BI__builtin_neon_vld1q_v:
11655  case NEON::BI__builtin_neon_vld1_dup_v:
11656  case NEON::BI__builtin_neon_vld1q_dup_v:
11657  case NEON::BI__builtin_neon_vld1_lane_v:
11658  case NEON::BI__builtin_neon_vld1q_lane_v:
11659  case NEON::BI__builtin_neon_vst1_v:
11660  case NEON::BI__builtin_neon_vst1q_v:
11661  case NEON::BI__builtin_neon_vst1_lane_v:
11662  case NEON::BI__builtin_neon_vst1q_lane_v:
11663  case NEON::BI__builtin_neon_vldap1_lane_s64:
11664  case NEON::BI__builtin_neon_vldap1q_lane_s64:
11665  case NEON::BI__builtin_neon_vstl1_lane_s64:
11666  case NEON::BI__builtin_neon_vstl1q_lane_s64:
11667  // Get the alignment for the argument in addition to the value;
11668  // we'll use it later.
11669  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11670  Ops.push_back(PtrOp0.emitRawPointer(*this));
11671  continue;
11672  }
11673  }
11674  Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11675  }
11676 
11677  auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11678  const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11679  SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11680 
11681  if (Builtin) {
11682  Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11683  Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11684  assert(Result && "SISD intrinsic should have been handled");
11685  return Result;
11686  }
11687 
11688  const Expr *Arg = E->getArg(E->getNumArgs()-1);
11689  NeonTypeFlags Type(0);
11690  if (std::optional<llvm::APSInt> Result =
11691  Arg->getIntegerConstantExpr(getContext()))
11692  // Determine the type of this overloaded NEON intrinsic.
11693  Type = NeonTypeFlags(Result->getZExtValue());
11694 
11695  bool usgn = Type.isUnsigned();
11696  bool quad = Type.isQuad();
11697 
11698  // Handle non-overloaded intrinsics first.
11699  switch (BuiltinID) {
11700  default: break;
11701  case NEON::BI__builtin_neon_vabsh_f16:
11702  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11703  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11704  case NEON::BI__builtin_neon_vaddq_p128: {
11705  llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11706  Ops.push_back(EmitScalarExpr(E->getArg(1)));
11707  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11708  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11709  Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
11710  llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11711  return Builder.CreateBitCast(Ops[0], Int128Ty);
11712  }
11713  case NEON::BI__builtin_neon_vldrq_p128: {
11714  llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11715  Value *Ptr = EmitScalarExpr(E->getArg(0));
11716  return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11718  }
11719  case NEON::BI__builtin_neon_vstrq_p128: {
11720  Value *Ptr = Ops[0];
11721  return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
11722  }
11723  case NEON::BI__builtin_neon_vcvts_f32_u32:
11724  case NEON::BI__builtin_neon_vcvtd_f64_u64:
11725  usgn = true;
11726  [[fallthrough]];
11727  case NEON::BI__builtin_neon_vcvts_f32_s32:
11728  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11729  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11730  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11731  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11732  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11733  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11734  if (usgn)
11735  return Builder.CreateUIToFP(Ops[0], FTy);
11736  return Builder.CreateSIToFP(Ops[0], FTy);
11737  }
11738  case NEON::BI__builtin_neon_vcvth_f16_u16:
11739  case NEON::BI__builtin_neon_vcvth_f16_u32:
11740  case NEON::BI__builtin_neon_vcvth_f16_u64:
11741  usgn = true;
11742  [[fallthrough]];
11743  case NEON::BI__builtin_neon_vcvth_f16_s16:
11744  case NEON::BI__builtin_neon_vcvth_f16_s32:
11745  case NEON::BI__builtin_neon_vcvth_f16_s64: {
11746  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11747  llvm::Type *FTy = HalfTy;
11748  llvm::Type *InTy;
11749  if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11750  InTy = Int64Ty;
11751  else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11752  InTy = Int32Ty;
11753  else
11754  InTy = Int16Ty;
11755  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11756  if (usgn)
11757  return Builder.CreateUIToFP(Ops[0], FTy);
11758  return Builder.CreateSIToFP(Ops[0], FTy);
11759  }
11760  case NEON::BI__builtin_neon_vcvtah_u16_f16:
11761  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11762  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11763  case NEON::BI__builtin_neon_vcvtph_u16_f16:
11764  case NEON::BI__builtin_neon_vcvth_u16_f16:
11765  case NEON::BI__builtin_neon_vcvtah_s16_f16:
11766  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11767  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11768  case NEON::BI__builtin_neon_vcvtph_s16_f16:
11769  case NEON::BI__builtin_neon_vcvth_s16_f16: {
11770  unsigned Int;
11771  llvm::Type* InTy = Int32Ty;
11772  llvm::Type* FTy = HalfTy;
11773  llvm::Type *Tys[2] = {InTy, FTy};
11774  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11775  switch (BuiltinID) {
11776  default: llvm_unreachable("missing builtin ID in switch!");
11777  case NEON::BI__builtin_neon_vcvtah_u16_f16:
11778  Int = Intrinsic::aarch64_neon_fcvtau; break;
11779  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11780  Int = Intrinsic::aarch64_neon_fcvtmu; break;
11781  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11782  Int = Intrinsic::aarch64_neon_fcvtnu; break;
11783  case NEON::BI__builtin_neon_vcvtph_u16_f16:
11784  Int = Intrinsic::aarch64_neon_fcvtpu; break;
11785  case NEON::BI__builtin_neon_vcvth_u16_f16:
11786  Int = Intrinsic::aarch64_neon_fcvtzu; break;
11787  case NEON::BI__builtin_neon_vcvtah_s16_f16:
11788  Int = Intrinsic::aarch64_neon_fcvtas; break;
11789  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11790  Int = Intrinsic::aarch64_neon_fcvtms; break;
11791  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11792  Int = Intrinsic::aarch64_neon_fcvtns; break;
11793  case NEON::BI__builtin_neon_vcvtph_s16_f16:
11794  Int = Intrinsic::aarch64_neon_fcvtps; break;
11795  case NEON::BI__builtin_neon_vcvth_s16_f16:
11796  Int = Intrinsic::aarch64_neon_fcvtzs; break;
11797  }
11798  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11799  return Builder.CreateTrunc(Ops[0], Int16Ty);
11800  }
11801  case NEON::BI__builtin_neon_vcaleh_f16:
11802  case NEON::BI__builtin_neon_vcalth_f16:
11803  case NEON::BI__builtin_neon_vcageh_f16:
11804  case NEON::BI__builtin_neon_vcagth_f16: {
11805  unsigned Int;
11806  llvm::Type* InTy = Int32Ty;
11807  llvm::Type* FTy = HalfTy;
11808  llvm::Type *Tys[2] = {InTy, FTy};
11809  Ops.push_back(EmitScalarExpr(E->getArg(1)));
11810  switch (BuiltinID) {
11811  default: llvm_unreachable("missing builtin ID in switch!");
11812  case NEON::BI__builtin_neon_vcageh_f16:
11813  Int = Intrinsic::aarch64_neon_facge; break;
11814  case NEON::BI__builtin_neon_vcagth_f16:
11815  Int = Intrinsic::aarch64_neon_facgt; break;
11816  case NEON::BI__builtin_neon_vcaleh_f16:
11817  Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11818  case NEON::BI__builtin_neon_vcalth_f16:
11819  Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11820  }
11821  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11822  return Builder.CreateTrunc(Ops[0], Int16Ty);
11823  }
11824  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11825  case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11826  unsigned Int;
11827  llvm::Type* InTy = Int32Ty;
11828  llvm::Type* FTy = HalfTy;
11829  llvm::Type *Tys[2] = {InTy, FTy};
11830  Ops.push_back(EmitScalarExpr(E->getArg(1)));
11831  switch (BuiltinID) {
11832  default: llvm_unreachable("missing builtin ID in switch!");
11833  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11834  Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11835  case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11836  Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11837  }
11838  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11839  return Builder.CreateTrunc(Ops[0], Int16Ty);
11840  }
11841  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11842  case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11843  unsigned Int;
11844  llvm::Type* FTy = HalfTy;
11845  llvm::Type* InTy = Int32Ty;
11846  llvm::Type *Tys[2] = {FTy, InTy};
11847  Ops.push_back(EmitScalarExpr(E->getArg(1)));
11848  switch (BuiltinID) {
11849  default: llvm_unreachable("missing builtin ID in switch!");
11850  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11851  Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11852  Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11853  break;
11854  case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11855  Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11856  Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11857  break;
11858  }
11859  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11860  }
11861  case NEON::BI__builtin_neon_vpaddd_s64: {
11862  auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11863  Value *Vec = EmitScalarExpr(E->getArg(0));
11864  // The vector is v2f64, so make sure it's bitcast to that.
11865  Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11866  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11867  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11868  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11869  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11870  // Pairwise addition of a v2f64 into a scalar f64.
11871  return Builder.CreateAdd(Op0, Op1, "vpaddd");
11872  }
11873  case NEON::BI__builtin_neon_vpaddd_f64: {
11874  auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11875  Value *Vec = EmitScalarExpr(E->getArg(0));
11876  // The vector is v2f64, so make sure it's bitcast to that.
11877  Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11878  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11879  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11880  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11881  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11882  // Pairwise addition of a v2f64 into a scalar f64.
11883  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11884  }
11885  case NEON::BI__builtin_neon_vpadds_f32: {
11886  auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11887  Value *Vec = EmitScalarExpr(E->getArg(0));
11888  // The vector is v2f32, so make sure it's bitcast to that.
11889  Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11890  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11891  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11892  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11893  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11894  // Pairwise addition of a v2f32 into a scalar f32.
11895  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11896  }
11897  case NEON::BI__builtin_neon_vceqzd_s64:
11898  case NEON::BI__builtin_neon_vceqzd_f64:
11899  case NEON::BI__builtin_neon_vceqzs_f32:
11900  case NEON::BI__builtin_neon_vceqzh_f16:
11901  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11902  return EmitAArch64CompareBuiltinExpr(
11903  Ops[0], ConvertType(E->getCallReturnType(getContext())),
11904  ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11905  case NEON::BI__builtin_neon_vcgezd_s64:
11906  case NEON::BI__builtin_neon_vcgezd_f64:
11907  case NEON::BI__builtin_neon_vcgezs_f32:
11908  case NEON::BI__builtin_neon_vcgezh_f16:
11909  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11910  return EmitAArch64CompareBuiltinExpr(
11911  Ops[0], ConvertType(E->getCallReturnType(getContext())),
11912  ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11913  case NEON::BI__builtin_neon_vclezd_s64:
11914  case NEON::BI__builtin_neon_vclezd_f64:
11915  case NEON::BI__builtin_neon_vclezs_f32:
11916  case NEON::BI__builtin_neon_vclezh_f16:
11917  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11918  return EmitAArch64CompareBuiltinExpr(
11919  Ops[0], ConvertType(E->getCallReturnType(getContext())),
11920  ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11921  case NEON::BI__builtin_neon_vcgtzd_s64:
11922  case NEON::BI__builtin_neon_vcgtzd_f64:
11923  case NEON::BI__builtin_neon_vcgtzs_f32:
11924  case NEON::BI__builtin_neon_vcgtzh_f16:
11925  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11926  return EmitAArch64CompareBuiltinExpr(
11927  Ops[0], ConvertType(E->getCallReturnType(getContext())),
11928  ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11929  case NEON::BI__builtin_neon_vcltzd_s64:
11930  case NEON::BI__builtin_neon_vcltzd_f64:
11931  case NEON::BI__builtin_neon_vcltzs_f32:
11932  case NEON::BI__builtin_neon_vcltzh_f16:
11933  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11934  return EmitAArch64CompareBuiltinExpr(
11935  Ops[0], ConvertType(E->getCallReturnType(getContext())),
11936  ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11937 
11938  case NEON::BI__builtin_neon_vceqzd_u64: {
11939  Ops.push_back(EmitScalarExpr(E->getArg(0)));
11940  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11941  Ops[0] =
11942  Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11943  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11944  }
11945  case NEON::BI__builtin_neon_vceqd_f64:
11946  case NEON::BI__builtin_neon_vcled_f64:
11947  case NEON::BI__builtin_neon_vcltd_f64:
11948  case NEON::BI__builtin_neon_vcged_f64:
11949  case NEON::BI__builtin_neon_vcgtd_f64: {
11950  llvm::CmpInst::Predicate P;
11951  switch (BuiltinID) {
11952  default: llvm_unreachable("missing builtin ID in switch!");
11953  case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11954  case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11955  case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11956  case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11957  case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11958  }
11959  Ops.push_back(EmitScalarExpr(E->getArg(1)));
11960  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11961  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11962  if (P == llvm::FCmpInst::FCMP_OEQ)
11963  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11964  else
11965  Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11966  return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11967  }
11968  case NEON::BI__builtin_neon_vceqs_f32:
11969  case NEON::BI__builtin_neon_vcles_f32:
11970  case NEON::BI__builtin_neon_vclts_f32:
11971  case NEON::BI__builtin_neon_vcges_f32:
11972  case NEON::BI__builtin_neon_vcgts_f32: {
11973  llvm::CmpInst::Predicate P;
11974  switch (BuiltinID) {
11975  default: llvm_unreachable("missing builtin ID in switch!");
11976  case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11977  case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11978  case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11979  case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11980  case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11981  }
11982  Ops.push_back(EmitScalarExpr(E->getArg(1)));
11983  Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11984  Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11985  if (P == llvm::FCmpInst::FCMP_OEQ)
11986  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11987  else
11988  Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11989  return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11990  }
11991  case NEON::BI__builtin_neon_vceqh_f16:
11992  case NEON::BI__builtin_neon_vcleh_f16:
11993  case NEON::BI__builtin_neon_vclth_f16:
11994  case NEON::BI__builtin_neon_vcgeh_f16:
11995  case NEON::BI__builtin_neon_vcgth_f16: {
11996  llvm::CmpInst::Predicate P;
11997  switch (BuiltinID) {
11998  default: llvm_unreachable("missing builtin ID in switch!");
11999  case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12000  case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12001  case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12002  case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12003  case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12004  }
12005  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12006  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12007  Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12008  if (P == llvm::FCmpInst::FCMP_OEQ)
12009  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12010  else
12011  Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12012  return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12013  }
12014  case NEON::BI__builtin_neon_vceqd_s64:
12015  case NEON::BI__builtin_neon_vceqd_u64:
12016  case NEON::BI__builtin_neon_vcgtd_s64:
12017  case NEON::BI__builtin_neon_vcgtd_u64:
12018  case NEON::BI__builtin_neon_vcltd_s64:
12019  case NEON::BI__builtin_neon_vcltd_u64:
12020  case NEON::BI__builtin_neon_vcged_u64:
12021  case NEON::BI__builtin_neon_vcged_s64:
12022  case NEON::BI__builtin_neon_vcled_u64:
12023  case NEON::BI__builtin_neon_vcled_s64: {
12024  llvm::CmpInst::Predicate P;
12025  switch (BuiltinID) {
12026  default: llvm_unreachable("missing builtin ID in switch!");
12027  case NEON::BI__builtin_neon_vceqd_s64:
12028  case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12029  case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12030  case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12031  case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12032  case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12033  case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12034  case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12035  case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12036  case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12037  }
12038  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12039  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12040  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12041  Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12042  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12043  }
12044  case NEON::BI__builtin_neon_vtstd_s64:
12045  case NEON::BI__builtin_neon_vtstd_u64: {
12046  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12047  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12048  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12049  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12050  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12051  llvm::Constant::getNullValue(Int64Ty));
12052  return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12053  }
12054  case NEON::BI__builtin_neon_vset_lane_i8:
12055  case NEON::BI__builtin_neon_vset_lane_i16:
12056  case NEON::BI__builtin_neon_vset_lane_i32:
12057  case NEON::BI__builtin_neon_vset_lane_i64:
12058  case NEON::BI__builtin_neon_vset_lane_bf16:
12059  case NEON::BI__builtin_neon_vset_lane_f32:
12060  case NEON::BI__builtin_neon_vsetq_lane_i8:
12061  case NEON::BI__builtin_neon_vsetq_lane_i16:
12062  case NEON::BI__builtin_neon_vsetq_lane_i32:
12063  case NEON::BI__builtin_neon_vsetq_lane_i64:
12064  case NEON::BI__builtin_neon_vsetq_lane_bf16:
12065  case NEON::BI__builtin_neon_vsetq_lane_f32:
12066  Ops.push_back(EmitScalarExpr(E->getArg(2)));
12067  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12068  case NEON::BI__builtin_neon_vset_lane_f64:
12069  // The vector type needs a cast for the v1f64 variant.
12070  Ops[1] =
12071  Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12072  Ops.push_back(EmitScalarExpr(E->getArg(2)));
12073  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12074  case NEON::BI__builtin_neon_vsetq_lane_f64:
12075  // The vector type needs a cast for the v2f64 variant.
12076  Ops[1] =
12077  Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12078  Ops.push_back(EmitScalarExpr(E->getArg(2)));
12079  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12080 
12081  case NEON::BI__builtin_neon_vget_lane_i8:
12082  case NEON::BI__builtin_neon_vdupb_lane_i8:
12083  Ops[0] =
12084  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12085  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12086  "vget_lane");
12087  case NEON::BI__builtin_neon_vgetq_lane_i8:
12088  case NEON::BI__builtin_neon_vdupb_laneq_i8:
12089  Ops[0] =
12090  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12091  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12092  "vgetq_lane");
12093  case NEON::BI__builtin_neon_vget_lane_i16:
12094  case NEON::BI__builtin_neon_vduph_lane_i16:
12095  Ops[0] =
12096  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12097  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12098  "vget_lane");
12099  case NEON::BI__builtin_neon_vgetq_lane_i16:
12100  case NEON::BI__builtin_neon_vduph_laneq_i16:
12101  Ops[0] =
12102  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12103  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12104  "vgetq_lane");
12105  case NEON::BI__builtin_neon_vget_lane_i32:
12106  case NEON::BI__builtin_neon_vdups_lane_i32:
12107  Ops[0] =
12108  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12109  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12110  "vget_lane");
12111  case NEON::BI__builtin_neon_vdups_lane_f32:
12112  Ops[0] =
12113  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12114  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12115  "vdups_lane");
12116  case NEON::BI__builtin_neon_vgetq_lane_i32:
12117  case NEON::BI__builtin_neon_vdups_laneq_i32:
12118  Ops[0] =
12119  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12120  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12121  "vgetq_lane");
12122  case NEON::BI__builtin_neon_vget_lane_i64:
12123  case NEON::BI__builtin_neon_vdupd_lane_i64:
12124  Ops[0] =
12125  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12126  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12127  "vget_lane");
12128  case NEON::BI__builtin_neon_vdupd_lane_f64:
12129  Ops[0] =
12130  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12131  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12132  "vdupd_lane");
12133  case NEON::BI__builtin_neon_vgetq_lane_i64:
12134  case NEON::BI__builtin_neon_vdupd_laneq_i64:
12135  Ops[0] =
12136  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12137  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12138  "vgetq_lane");
12139  case NEON::BI__builtin_neon_vget_lane_f32:
12140  Ops[0] =
12141  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12142  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12143  "vget_lane");
12144  case NEON::BI__builtin_neon_vget_lane_f64:
12145  Ops[0] =
12146  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12147  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12148  "vget_lane");
12149  case NEON::BI__builtin_neon_vgetq_lane_f32:
12150  case NEON::BI__builtin_neon_vdups_laneq_f32:
12151  Ops[0] =
12152  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12153  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12154  "vgetq_lane");
12155  case NEON::BI__builtin_neon_vgetq_lane_f64:
12156  case NEON::BI__builtin_neon_vdupd_laneq_f64:
12157  Ops[0] =
12158  Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12159  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12160  "vgetq_lane");
12161  case NEON::BI__builtin_neon_vaddh_f16:
12162  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12163  return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12164  case NEON::BI__builtin_neon_vsubh_f16:
12165  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12166  return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12167  case NEON::BI__builtin_neon_vmulh_f16:
12168  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12169  return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12170  case NEON::BI__builtin_neon_vdivh_f16:
12171  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12172  return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12173  case NEON::BI__builtin_neon_vfmah_f16:
12174  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12176  *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12177  {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12178  case NEON::BI__builtin_neon_vfmsh_f16: {
12179  Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12180 
12181  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12183  *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12184  {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12185  }
12186  case NEON::BI__builtin_neon_vaddd_s64:
12187  case NEON::BI__builtin_neon_vaddd_u64:
12188  return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12189  case NEON::BI__builtin_neon_vsubd_s64:
12190  case NEON::BI__builtin_neon_vsubd_u64:
12191  return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12192  case NEON::BI__builtin_neon_vqdmlalh_s16:
12193  case NEON::BI__builtin_neon_vqdmlslh_s16: {
12194  SmallVector<Value *, 2> ProductOps;
12195  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12196  ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12197  auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12198  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12199  ProductOps, "vqdmlXl");
12200  Constant *CI = ConstantInt::get(SizeTy, 0);
12201  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12202 
12203  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12204  ? Intrinsic::aarch64_neon_sqadd
12205  : Intrinsic::aarch64_neon_sqsub;
12206  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12207  }
12208  case NEON::BI__builtin_neon_vqshlud_n_s64: {
12209  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12210  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12211  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12212  Ops, "vqshlu_n");
12213  }
12214  case NEON::BI__builtin_neon_vqshld_n_u64:
12215  case NEON::BI__builtin_neon_vqshld_n_s64: {
12216  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12217  ? Intrinsic::aarch64_neon_uqshl
12218  : Intrinsic::aarch64_neon_sqshl;
12219  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12220  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12221  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12222  }
12223  case NEON::BI__builtin_neon_vrshrd_n_u64:
12224  case NEON::BI__builtin_neon_vrshrd_n_s64: {
12225  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12226  ? Intrinsic::aarch64_neon_urshl
12227  : Intrinsic::aarch64_neon_srshl;
12228  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12229  int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12230  Ops[1] = ConstantInt::get(Int64Ty, -SV);
12231  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12232  }
12233  case NEON::BI__builtin_neon_vrsrad_n_u64:
12234  case NEON::BI__builtin_neon_vrsrad_n_s64: {
12235  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12236  ? Intrinsic::aarch64_neon_urshl
12237  : Intrinsic::aarch64_neon_srshl;
12238  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12239  Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12240  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12241  {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12242  return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12243  }
12244  case NEON::BI__builtin_neon_vshld_n_s64:
12245  case NEON::BI__builtin_neon_vshld_n_u64: {
12246  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12247  return Builder.CreateShl(
12248  Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12249  }
12250  case NEON::BI__builtin_neon_vshrd_n_s64: {
12251  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12252  return Builder.CreateAShr(
12253  Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12254  Amt->getZExtValue())),
12255  "shrd_n");
12256  }
12257  case NEON::BI__builtin_neon_vshrd_n_u64: {
12258  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12259  uint64_t ShiftAmt = Amt->getZExtValue();
12260  // Right-shifting an unsigned value by its size yields 0.
12261  if (ShiftAmt == 64)
12262  return ConstantInt::get(Int64Ty, 0);
12263  return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12264  "shrd_n");
12265  }
12266  case NEON::BI__builtin_neon_vsrad_n_s64: {
12267  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12268  Ops[1] = Builder.CreateAShr(
12269  Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12270  Amt->getZExtValue())),
12271  "shrd_n");
12272  return Builder.CreateAdd(Ops[0], Ops[1]);
12273  }
12274  case NEON::BI__builtin_neon_vsrad_n_u64: {
12275  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12276  uint64_t ShiftAmt = Amt->getZExtValue();
12277  // Right-shifting an unsigned value by its size yields 0.
12278  // As Op + 0 = Op, return Ops[0] directly.
12279  if (ShiftAmt == 64)
12280  return Ops[0];
12281  Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12282  "shrd_n");
12283  return Builder.CreateAdd(Ops[0], Ops[1]);
12284  }
12285  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12286  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12287  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12288  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12289  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12290  "lane");
12291  SmallVector<Value *, 2> ProductOps;
12292  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12293  ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12294  auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12295  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12296  ProductOps, "vqdmlXl");
12297  Constant *CI = ConstantInt::get(SizeTy, 0);
12298  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12299  Ops.pop_back();
12300 
12301  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12302  BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12303  ? Intrinsic::aarch64_neon_sqadd
12304  : Intrinsic::aarch64_neon_sqsub;
12305  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12306  }
12307  case NEON::BI__builtin_neon_vqdmlals_s32:
12308  case NEON::BI__builtin_neon_vqdmlsls_s32: {
12309  SmallVector<Value *, 2> ProductOps;
12310  ProductOps.push_back(Ops[1]);
12311  ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12312  Ops[1] =
12313  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12314  ProductOps, "vqdmlXl");
12315 
12316  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12317  ? Intrinsic::aarch64_neon_sqadd
12318  : Intrinsic::aarch64_neon_sqsub;
12319  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12320  }
12321  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12322  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12323  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12324  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12325  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12326  "lane");
12327  SmallVector<Value *, 2> ProductOps;
12328  ProductOps.push_back(Ops[1]);
12329  ProductOps.push_back(Ops[2]);
12330  Ops[1] =
12331  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12332  ProductOps, "vqdmlXl");
12333  Ops.pop_back();
12334 
12335  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12336  BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12337  ? Intrinsic::aarch64_neon_sqadd
12338  : Intrinsic::aarch64_neon_sqsub;
12339  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12340  }
12341  case NEON::BI__builtin_neon_vget_lane_bf16:
12342  case NEON::BI__builtin_neon_vduph_lane_bf16:
12343  case NEON::BI__builtin_neon_vduph_lane_f16: {
12344  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12345  "vget_lane");
12346  }
12347  case NEON::BI__builtin_neon_vgetq_lane_bf16:
12348  case NEON::BI__builtin_neon_vduph_laneq_bf16:
12349  case NEON::BI__builtin_neon_vduph_laneq_f16: {
12350  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12351  "vgetq_lane");
12352  }
12353 
12354  case clang::AArch64::BI_InterlockedAdd:
12355  case clang::AArch64::BI_InterlockedAdd64: {
12356  Address DestAddr = CheckAtomicAlignment(*this, E);
12357  Value *Val = EmitScalarExpr(E->getArg(1));
12358  AtomicRMWInst *RMWI =
12359  Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12360  llvm::AtomicOrdering::SequentiallyConsistent);
12361  return Builder.CreateAdd(RMWI, Val);
12362  }
12363  }
12364 
12365  llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12366  llvm::Type *Ty = VTy;
12367  if (!Ty)
12368  return nullptr;
12369 
12370  // Not all intrinsics handled by the common case work for AArch64 yet, so only
12371  // defer to common code if it's been added to our special map.
12374 
12375  if (Builtin)
12376  return EmitCommonNeonBuiltinExpr(
12377  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12378  Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12379  /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12380 
12381  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12382  return V;
12383 
12384  unsigned Int;
12385  switch (BuiltinID) {
12386  default: return nullptr;
12387  case NEON::BI__builtin_neon_vbsl_v:
12388  case NEON::BI__builtin_neon_vbslq_v: {
12389  llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12390  Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12391  Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12392  Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12393 
12394  Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12395  Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12396  Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12397  return Builder.CreateBitCast(Ops[0], Ty);
12398  }
12399  case NEON::BI__builtin_neon_vfma_lane_v:
12400  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12401  // The ARM builtins (and instructions) have the addend as the first
12402  // operand, but the 'fma' intrinsics have it last. Swap it around here.
12403  Value *Addend = Ops[0];
12404  Value *Multiplicand = Ops[1];
12405  Value *LaneSource = Ops[2];
12406  Ops[0] = Multiplicand;
12407  Ops[1] = LaneSource;
12408  Ops[2] = Addend;
12409 
12410  // Now adjust things to handle the lane access.
12411  auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12412  ? llvm::FixedVectorType::get(VTy->getElementType(),
12413  VTy->getNumElements() / 2)
12414  : VTy;
12415  llvm::Constant *cst = cast<Constant>(Ops[3]);
12416  Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12417  Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12418  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12419 
12420  Ops.pop_back();
12421  Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12422  : Intrinsic::fma;
12423  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12424  }
12425  case NEON::BI__builtin_neon_vfma_laneq_v: {
12426  auto *VTy = cast<llvm::FixedVectorType>(Ty);
12427  // v1f64 fma should be mapped to Neon scalar f64 fma
12428  if (VTy && VTy->getElementType() == DoubleTy) {
12429  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12430  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12431  llvm::FixedVectorType *VTy =
12432  GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
12433  Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12434  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12435  Value *Result;
12437  *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12438  DoubleTy, {Ops[1], Ops[2], Ops[0]});
12439  return Builder.CreateBitCast(Result, Ty);
12440  }
12441  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12442  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12443 
12444  auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12445  VTy->getNumElements() * 2);
12446  Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12447  Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12448  cast<ConstantInt>(Ops[3]));
12449  Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12450 
12452  *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12453  {Ops[2], Ops[1], Ops[0]});
12454  }
12455  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12456  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12457  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12458 
12459  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12460  Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12462  *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12463  {Ops[2], Ops[1], Ops[0]});
12464  }
12465  case NEON::BI__builtin_neon_vfmah_lane_f16:
12466  case NEON::BI__builtin_neon_vfmas_lane_f32:
12467  case NEON::BI__builtin_neon_vfmah_laneq_f16:
12468  case NEON::BI__builtin_neon_vfmas_laneq_f32:
12469  case NEON::BI__builtin_neon_vfmad_lane_f64:
12470  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12471  Ops.push_back(EmitScalarExpr(E->getArg(3)));
12472  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12473  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12475  *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12476  {Ops[1], Ops[2], Ops[0]});
12477  }
12478  case NEON::BI__builtin_neon_vmull_v:
12479  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12480  Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12481  if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12482  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12483  case NEON::BI__builtin_neon_vmax_v:
12484  case NEON::BI__builtin_neon_vmaxq_v:
12485  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12486  Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12487  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12488  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12489  case NEON::BI__builtin_neon_vmaxh_f16: {
12490  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12491  Int = Intrinsic::aarch64_neon_fmax;
12492  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12493  }
12494  case NEON::BI__builtin_neon_vmin_v:
12495  case NEON::BI__builtin_neon_vminq_v:
12496  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12497  Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12498  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12499  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12500  case NEON::BI__builtin_neon_vminh_f16: {
12501  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12502  Int = Intrinsic::aarch64_neon_fmin;
12503  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12504  }
12505  case NEON::BI__builtin_neon_vabd_v:
12506  case NEON::BI__builtin_neon_vabdq_v:
12507  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12508  Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12509  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12510  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12511  case NEON::BI__builtin_neon_vpadal_v:
12512  case NEON::BI__builtin_neon_vpadalq_v: {
12513  unsigned ArgElts = VTy->getNumElements();
12514  llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12515  unsigned BitWidth = EltTy->getBitWidth();
12516  auto *ArgTy = llvm::FixedVectorType::get(
12517  llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12518  llvm::Type* Tys[2] = { VTy, ArgTy };
12519  Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12521  TmpOps.push_back(Ops[1]);
12522  Function *F = CGM.getIntrinsic(Int, Tys);
12523  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12524  llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12525  return Builder.CreateAdd(tmp, addend);
12526  }
12527  case NEON::BI__builtin_neon_vpmin_v:
12528  case NEON::BI__builtin_neon_vpminq_v:
12529  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12530  Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12531  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12532  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12533  case NEON::BI__builtin_neon_vpmax_v:
12534  case NEON::BI__builtin_neon_vpmaxq_v:
12535  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12536  Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12537  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12538  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12539  case NEON::BI__builtin_neon_vminnm_v:
12540  case NEON::BI__builtin_neon_vminnmq_v:
12541  Int = Intrinsic::aarch64_neon_fminnm;
12542  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12543  case NEON::BI__builtin_neon_vminnmh_f16:
12544  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12545  Int = Intrinsic::aarch64_neon_fminnm;
12546  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12547  case NEON::BI__builtin_neon_vmaxnm_v:
12548  case NEON::BI__builtin_neon_vmaxnmq_v:
12549  Int = Intrinsic::aarch64_neon_fmaxnm;
12550  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12551  case NEON::BI__builtin_neon_vmaxnmh_f16:
12552  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12553  Int = Intrinsic::aarch64_neon_fmaxnm;
12554  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12555  case NEON::BI__builtin_neon_vrecpss_f32: {
12556  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12557  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12558  Ops, "vrecps");
12559  }
12560  case NEON::BI__builtin_neon_vrecpsd_f64:
12561  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12562  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12563  Ops, "vrecps");
12564  case NEON::BI__builtin_neon_vrecpsh_f16:
12565  Ops.push_back(EmitScalarExpr(E->getArg(1)));
12566  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12567  Ops, "vrecps");
12568  case NEON::BI__builtin_neon_vqshrun_n_v:
12569  Int = Intrinsic::aarch64_neon_sqshrun;
12570  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12571  case NEON::BI__builtin_neon_vqrshrun_n_v:
12572  Int = Intrinsic::aarch64_neon_sqrshrun;
12573  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12574  case NEON::BI__builtin_neon_vqshrn_n_v:
12575  Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12576  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12577  case NEON::BI__builtin_neon_vrshrn_n_v:
12578  Int = Intrinsic::aarch64_neon_rshrn;
12579  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12580  case NEON::BI__builtin_neon_vqrshrn_n_v:
12581  Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12582  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12583  case NEON::BI__builtin_neon_vrndah_f16: {
12584  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12585  Int = Builder.getIsFPConstrained()
12586  ? Intrinsic::experimental_constrained_round
12587  : Intrinsic::round;
12588  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12589  }
12590  case NEON::BI__builtin_neon_vrnda_v:
12591  case NEON::BI__builtin_neon_vrndaq_v: {
12592  Int = Builder.getIsFPConstrained()
12593  ? Intrinsic::experimental_constrained_round
12594  : Intrinsic::round;
12595  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12596  }
12597  case NEON::BI__builtin_neon_vrndih_f16: {
12598  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12599  Int = Builder.getIsFPConstrained()
12600  ? Intrinsic::experimental_constrained_nearbyint
12602  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12603  }
12604  case NEON::BI__builtin_neon_vrndmh_f16: {
12605  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12606  Int = Builder.getIsFPConstrained()
12607  ? Intrinsic::experimental_constrained_floor
12608  : Intrinsic::floor;
12609  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12610  }
12611  case NEON::BI__builtin_neon_vrndm_v:
12612  case NEON::BI__builtin_neon_vrndmq_v: {
12613  Int = Builder.getIsFPConstrained()
12614  ? Intrinsic::experimental_constrained_floor
12615  : Intrinsic::floor;
12616  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12617  }
12618  case NEON::BI__builtin_neon_vrndnh_f16: {
12619  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12620  Int = Builder.getIsFPConstrained()
12621  ? Intrinsic::experimental_constrained_roundeven
12622  : Intrinsic::roundeven;
12623  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12624  }
12625  case NEON::BI__builtin_neon_vrndn_v:
12626  case NEON::BI__builtin_neon_vrndnq_v: {
12627  Int = Builder.getIsFPConstrained()
12628  ? Intrinsic::experimental_constrained_roundeven
12629  : Intrinsic::roundeven;
12630  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12631  }
12632  case NEON::BI__builtin_neon_vrndns_f32: {
12633  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12634  Int = Builder.getIsFPConstrained()
12635  ? Intrinsic::experimental_constrained_roundeven
12636  : Intrinsic::roundeven;
12637  return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12638  }
12639  case NEON::BI__builtin_neon_vrndph_f16: {
12640  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12641  Int = Builder.getIsFPConstrained()
12642  ? Intrinsic::experimental_constrained_ceil
12643  : Intrinsic::ceil;
12644  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12645  }
12646  case NEON::BI__builtin_neon_vrndp_v:
12647  case NEON::BI__builtin_neon_vrndpq_v: {
12648  Int = Builder.getIsFPConstrained()
12649  ? Intrinsic::experimental_constrained_ceil
12650  : Intrinsic::ceil;
12651  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12652  }
12653  case NEON::BI__builtin_neon_vrndxh_f16: {
12654  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12655  Int = Builder.getIsFPConstrained()
12656  ? Intrinsic::experimental_constrained_rint
12657  : Intrinsic::rint;
12658  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12659  }
12660  case NEON::BI__builtin_neon_vrndx_v:
12661  case NEON::BI__builtin_neon_vrndxq_v: {
12662  Int = Builder.getIsFPConstrained()
12663  ? Intrinsic::experimental_constrained_rint
12664  : Intrinsic::rint;
12665  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12666  }
12667  case NEON::BI__builtin_neon_vrndh_f16: {
12668  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12669  Int = Builder.getIsFPConstrained()
12670  ? Intrinsic::experimental_constrained_trunc
12671  : Intrinsic::trunc;
12672  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12673  }
12674  case NEON::BI__builtin_neon_vrnd32x_f32:
12675  case NEON::BI__builtin_neon_vrnd32xq_f32:
12676  case NEON::BI__builtin_neon_vrnd32x_f64:
12677  case NEON::BI__builtin_neon_vrnd32xq_f64: {
12678  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12679  Int = Intrinsic::aarch64_neon_frint32x;
12680  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12681  }
12682  case NEON::BI__builtin_neon_vrnd32z_f32:
12683  case NEON::BI__builtin_neon_vrnd32zq_f32:
12684  case NEON::BI__builtin_neon_vrnd32z_f64:
12685  case NEON::BI__builtin_neon_vrnd32zq_f64: {
12686  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12687  Int = Intrinsic::aarch64_neon_frint32z;
12688  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12689  }
12690  case NEON::BI__builtin_neon_vrnd64x_f32:
12691  case NEON::BI__builtin_neon_vrnd64xq_f32:
12692  case NEON::BI__builtin_neon_vrnd64x_f64:
12693  case NEON::BI__builtin_neon_vrnd64xq_f64: {
12694  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12695  Int = Intrinsic::aarch64_neon_frint64x;
12696  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12697  }
12698  case NEON::BI__builtin_neon_vrnd64z_f32:
12699  case NEON::BI__builtin_neon_vrnd64zq_f32:
12700  case NEON::BI__builtin_neon_vrnd64z_f64:
12701  case NEON::BI__builtin_neon_vrnd64zq_f64: {
12702  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12703  Int = Intrinsic::aarch64_neon_frint64z;
12704  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12705  }
12706  case NEON::BI__builtin_neon_vrnd_v:
12707  case NEON::BI__builtin_neon_vrndq_v: {
12708  Int = Builder.getIsFPConstrained()
12709  ? Intrinsic::experimental_constrained_trunc
12710  : Intrinsic::trunc;
12711  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12712  }
12713  case NEON::BI__builtin_neon_vcvt_f64_v:
12714  case NEON::BI__builtin_neon_vcvtq_f64_v:
12715  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12716  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12717  return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12718  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12719  case NEON::BI__builtin_neon_vcvt_f64_f32: {
12720  assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12721  "unexpected vcvt_f64_f32 builtin");
12722  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12723  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12724 
12725  return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12726  }
12727  case NEON::BI__builtin_neon_vcvt_f32_f64: {
12728  assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12729  "unexpected vcvt_f32_f64 builtin");
12730  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12731  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12732 
12733  return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12734  }
12735  case NEON::BI__builtin_neon_vcvt_s32_v:
12736  case NEON::BI__builtin_neon_vcvt_u32_v:
12737  case NEON::BI__builtin_neon_vcvt_s64_v:
12738  case NEON::BI__builtin_neon_vcvt_u64_v:
12739  case NEON::BI__builtin_neon_vcvt_s16_f16:
12740  case NEON::BI__builtin_neon_vcvt_u16_f16:
12741  case NEON::BI__builtin_neon_vcvtq_s32_v:
12742  case NEON::BI__builtin_neon_vcvtq_u32_v:
12743  case NEON::BI__builtin_neon_vcvtq_s64_v:
12744  case NEON::BI__builtin_neon_vcvtq_u64_v:
12745  case NEON::BI__builtin_neon_vcvtq_s16_f16:
12746  case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12747  Int =
12748  usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12749  llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12750  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12751  }
12752  case NEON::BI__builtin_neon_vcvta_s16_f16:
12753  case NEON::BI__builtin_neon_vcvta_u16_f16:
12754  case NEON::BI__builtin_neon_vcvta_s32_v:
12755  case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12756  case NEON::BI__builtin_neon_vcvtaq_s32_v:
12757  case NEON::BI__builtin_neon_vcvta_u32_v:
12758  case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12759  case NEON::BI__builtin_neon_vcvtaq_u32_v:
12760  case NEON::BI__builtin_neon_vcvta_s64_v:
12761  case NEON::BI__builtin_neon_vcvtaq_s64_v:
12762  case NEON::BI__builtin_neon_vcvta_u64_v:
12763  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12764  Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12765  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12766  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12767  }
12768  case NEON::BI__builtin_neon_vcvtm_s16_f16:
12769  case NEON::BI__builtin_neon_vcvtm_s32_v:
12770  case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12771  case NEON::BI__builtin_neon_vcvtmq_s32_v:
12772  case NEON::BI__builtin_neon_vcvtm_u16_f16:
12773  case NEON::BI__builtin_neon_vcvtm_u32_v:
12774  case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12775  case NEON::BI__builtin_neon_vcvtmq_u32_v:
12776  case NEON::BI__builtin_neon_vcvtm_s64_v:
12777  case NEON::BI__builtin_neon_vcvtmq_s64_v:
12778  case NEON::BI__builtin_neon_vcvtm_u64_v:
12779  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12780  Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12781  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12782  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12783  }
12784  case NEON::BI__builtin_neon_vcvtn_s16_f16:
12785  case NEON::BI__builtin_neon_vcvtn_s32_v:
12786  case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12787  case NEON::BI__builtin_neon_vcvtnq_s32_v:
12788  case NEON::BI__builtin_neon_vcvtn_u16_f16:
12789  case NEON::BI__builtin_neon_vcvtn_u32_v:
12790  case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12791  case NEON::BI__builtin_neon_vcvtnq_u32_v:
12792  case NEON::BI__builtin_neon_vcvtn_s64_v:
12793  case NEON::BI__builtin_neon_vcvtnq_s64_v:
12794  case NEON::BI__builtin_neon_vcvtn_u64_v:
12795  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12796  Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12797  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12798  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12799  }
12800  case NEON::BI__builtin_neon_vcvtp_s16_f16:
12801  case NEON::BI__builtin_neon_vcvtp_s32_v:
12802  case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12803  case NEON::BI__builtin_neon_vcvtpq_s32_v:
12804  case NEON::BI__builtin_neon_vcvtp_u16_f16:
12805  case NEON::BI__builtin_neon_vcvtp_u32_v:
12806  case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12807  case NEON::BI__builtin_neon_vcvtpq_u32_v:
12808  case NEON::BI__builtin_neon_vcvtp_s64_v:
12809  case NEON::BI__builtin_neon_vcvtpq_s64_v:
12810  case NEON::BI__builtin_neon_vcvtp_u64_v:
12811  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12812  Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12813  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12814  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12815  }
12816  case NEON::BI__builtin_neon_vmulx_v:
12817  case NEON::BI__builtin_neon_vmulxq_v: {
12818  Int = Intrinsic::aarch64_neon_fmulx;
12819  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12820  }
12821  case NEON::BI__builtin_neon_vmulxh_lane_f16:
12822  case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12823  // vmulx_lane should be mapped to Neon scalar mulx after
12824  // extracting the scalar element
12825  Ops.push_back(EmitScalarExpr(E->getArg(2)));
12826  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12827  Ops.pop_back();
12828  Int = Intrinsic::aarch64_neon_fmulx;
12829  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12830  }
12831  case NEON::BI__builtin_neon_vmul_lane_v:
12832  case NEON::BI__builtin_neon_vmul_laneq_v: {
12833  // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12834  bool Quad = false;
12835  if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12836  Quad = true;
12837  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12838  llvm::FixedVectorType *VTy =
12839  GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
12840  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12841  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12842  Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12843  return Builder.CreateBitCast(Result, Ty);
12844  }
12845  case NEON::BI__builtin_neon_vnegd_s64:
12846  return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12847  case NEON::BI__builtin_neon_vnegh_f16:
12848  return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12849  case NEON::BI__builtin_neon_vpmaxnm_v:
12850  case NEON::BI__builtin_neon_vpmaxnmq_v: {
12851  Int = Intrinsic::aarch64_neon_fmaxnmp;
12852  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12853  }
12854  case NEON::BI__builtin_neon_vpminnm_v:
12855  case NEON::BI__builtin_neon_vpminnmq_v: {
12856  Int = Intrinsic::aarch64_neon_fminnmp;
12857  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12858  }
12859  case NEON::BI__builtin_neon_vsqrth_f16: {
12860  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12861  Int = Builder.getIsFPConstrained()
12862  ? Intrinsic::experimental_constrained_sqrt
12863  : Intrinsic::sqrt;
12864  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12865  }
12866  case NEON::BI__builtin_neon_vsqrt_v:
12867  case NEON::BI__builtin_neon_vsqrtq_v: {
12868  Int = Builder.getIsFPConstrained()
12869  ? Intrinsic::experimental_constrained_sqrt
12870  : Intrinsic::sqrt;
12871  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12872  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12873  }
12874  case NEON::BI__builtin_neon_vrbit_v:
12875  case NEON::BI__builtin_neon_vrbitq_v: {
12876  Int = Intrinsic::bitreverse;
12877  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12878  }
12879  case NEON::BI__builtin_neon_vaddv_u8:
12880  // FIXME: These are handled by the AArch64 scalar code.
12881  usgn = true;
12882  [[fallthrough]];
12883  case NEON::BI__builtin_neon_vaddv_s8: {
12884  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12885  Ty = Int32Ty;
12886  VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12887  llvm::Type *Tys[2] = { Ty, VTy };
12888  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12889  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12890  return Builder.CreateTrunc(Ops[0], Int8Ty);
12891  }
12892  case NEON::BI__builtin_neon_vaddv_u16:
12893  usgn = true;
12894  [[fallthrough]];
12895  case NEON::BI__builtin_neon_vaddv_s16: {
12896  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12897  Ty = Int32Ty;
12898  VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12899  llvm::Type *Tys[2] = { Ty, VTy };
12900  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12901  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12902  return Builder.CreateTrunc(Ops[0], Int16Ty);
12903  }
12904  case NEON::BI__builtin_neon_vaddvq_u8:
12905  usgn = true;
12906  [[fallthrough]];
12907  case NEON::BI__builtin_neon_vaddvq_s8: {
12908  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12909  Ty = Int32Ty;
12910  VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12911  llvm::Type *Tys[2] = { Ty, VTy };
12912  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12913  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12914  return Builder.CreateTrunc(Ops[0], Int8Ty);
12915  }
12916  case NEON::BI__builtin_neon_vaddvq_u16:
12917  usgn = true;
12918  [[fallthrough]];
12919  case NEON::BI__builtin_neon_vaddvq_s16: {
12920  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12921  Ty = Int32Ty;
12922  VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12923  llvm::Type *Tys[2] = { Ty, VTy };
12924  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12925  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12926  return Builder.CreateTrunc(Ops[0], Int16Ty);
12927  }
12928  case NEON::BI__builtin_neon_vmaxv_u8: {
12929  Int = Intrinsic::aarch64_neon_umaxv;
12930  Ty = Int32Ty;
12931  VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12932  llvm::Type *Tys[2] = { Ty, VTy };
12933  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12934  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12935  return Builder.CreateTrunc(Ops[0], Int8Ty);
12936  }
12937  case NEON::BI__builtin_neon_vmaxv_u16: {
12938  Int = Intrinsic::aarch64_neon_umaxv;
12939  Ty = Int32Ty;
12940  VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12941  llvm::Type *Tys[2] = { Ty, VTy };
12942  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12943  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12944  return Builder.CreateTrunc(Ops[0], Int16Ty);
12945  }
12946  case NEON::BI__builtin_neon_vmaxvq_u8: {
12947  Int = Intrinsic::aarch64_neon_umaxv;
12948  Ty = Int32Ty;
12949  VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12950  llvm::Type *Tys[2] = { Ty, VTy };
12951  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12952  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12953  return Builder.CreateTrunc(Ops[0], Int8Ty);
12954  }
12955  case NEON::BI__builtin_neon_vmaxvq_u16: {
12956  Int = Intrinsic::aarch64_neon_umaxv;
12957  Ty = Int32Ty;
12958  VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12959  llvm::Type *Tys[2] = { Ty, VTy };
12960  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12961  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12962  return Builder.CreateTrunc(Ops[0], Int16Ty);
12963  }
12964  case NEON::BI__builtin_neon_vmaxv_s8: {
12965  Int = Intrinsic::aarch64_neon_smaxv;
12966  Ty = Int32Ty;
12967  VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12968  llvm::Type *Tys[2] = { Ty, VTy };
12969  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12970  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12971  return Builder.CreateTrunc(Ops[0], Int8Ty);
12972  }
12973  case NEON::BI__builtin_neon_vmaxv_s16: {
12974  Int = Intrinsic::aarch64_neon_smaxv;
12975  Ty = Int32Ty;
12976  VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12977  llvm::Type *Tys[2] = { Ty, VTy };
12978  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12979  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12980  return Builder.CreateTrunc(Ops[0], Int16Ty);
12981  }
12982  case NEON::BI__builtin_neon_vmaxvq_s8: {
12983  Int = Intrinsic::aarch64_neon_smaxv;
12984  Ty = Int32Ty;
12985  VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12986  llvm::Type *Tys[2] = { Ty, VTy };
12987  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12988  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12989  return Builder.CreateTrunc(Ops[0], Int8Ty);
12990  }
12991  case NEON::BI__builtin_neon_vmaxvq_s16: {
12992  Int = Intrinsic::aarch64_neon_smaxv;
12993  Ty = Int32Ty;
12994  VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12995  llvm::Type *Tys[2] = { Ty, VTy };
12996  Ops.push_back(EmitScalarExpr(E->getArg(0)));
12997  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12998  return Builder.CreateTrunc(Ops[0], Int16Ty);
12999  }
13000  case NEON::BI__builtin_neon_vmaxv_f16: {
13001  Int = Intrinsic::aarch64_neon_fmaxv;
13002  Ty = HalfTy;
13003  VTy = llvm::FixedVectorType::get(HalfTy, 4);
13004  llvm::Type *Tys[2] = { Ty, VTy };
13005  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13006  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13007  return Builder.CreateTrunc(Ops[0], HalfTy);
13008  }
13009  case NEON::BI__builtin_neon_vmaxvq_f16: {
13010  Int = Intrinsic::aarch64_neon_fmaxv;
13011  Ty = HalfTy;
13012  VTy = llvm::FixedVectorType::get(HalfTy, 8);
13013  llvm::Type *Tys[2] = { Ty, VTy };
13014  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13015  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13016  return Builder.CreateTrunc(Ops[0], HalfTy);
13017  }
13018  case NEON::BI__builtin_neon_vminv_u8: {
13019  Int = Intrinsic::aarch64_neon_uminv;
13020  Ty = Int32Ty;
13021  VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13022  llvm::Type *Tys[2] = { Ty, VTy };
13023  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13024  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13025  return Builder.CreateTrunc(Ops[0], Int8Ty);
13026  }
13027  case NEON::BI__builtin_neon_vminv_u16: {
13028  Int = Intrinsic::aarch64_neon_uminv;
13029  Ty = Int32Ty;
13030  VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13031  llvm::Type *Tys[2] = { Ty, VTy };
13032  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13033  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13034  return Builder.CreateTrunc(Ops[0], Int16Ty);
13035  }
13036  case NEON::BI__builtin_neon_vminvq_u8: {
13037  Int = Intrinsic::aarch64_neon_uminv;
13038  Ty = Int32Ty;
13039  VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13040  llvm::Type *Tys[2] = { Ty, VTy };
13041  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13042  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13043  return Builder.CreateTrunc(Ops[0], Int8Ty);
13044  }
13045  case NEON::BI__builtin_neon_vminvq_u16: {
13046  Int = Intrinsic::aarch64_neon_uminv;
13047  Ty = Int32Ty;
13048  VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13049  llvm::Type *Tys[2] = { Ty, VTy };
13050  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13051  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13052  return Builder.CreateTrunc(Ops[0], Int16Ty);
13053  }
13054  case NEON::BI__builtin_neon_vminv_s8: {
13055  Int = Intrinsic::aarch64_neon_sminv;
13056  Ty = Int32Ty;
13057  VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13058  llvm::Type *Tys[2] = { Ty, VTy };
13059  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13060  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13061  return Builder.CreateTrunc(Ops[0], Int8Ty);
13062  }
13063  case NEON::BI__builtin_neon_vminv_s16: {
13064  Int = Intrinsic::aarch64_neon_sminv;
13065  Ty = Int32Ty;
13066  VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13067  llvm::Type *Tys[2] = { Ty, VTy };
13068  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13069  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13070  return Builder.CreateTrunc(Ops[0], Int16Ty);
13071  }
13072  case NEON::BI__builtin_neon_vminvq_s8: {
13073  Int = Intrinsic::aarch64_neon_sminv;
13074  Ty = Int32Ty;
13075  VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13076  llvm::Type *Tys[2] = { Ty, VTy };
13077  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13078  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13079  return Builder.CreateTrunc(Ops[0], Int8Ty);
13080  }
13081  case NEON::BI__builtin_neon_vminvq_s16: {
13082  Int = Intrinsic::aarch64_neon_sminv;
13083  Ty = Int32Ty;
13084  VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13085  llvm::Type *Tys[2] = { Ty, VTy };
13086  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13087  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13088  return Builder.CreateTrunc(Ops[0], Int16Ty);
13089  }
13090  case NEON::BI__builtin_neon_vminv_f16: {
13091  Int = Intrinsic::aarch64_neon_fminv;
13092  Ty = HalfTy;
13093  VTy = llvm::FixedVectorType::get(HalfTy, 4);
13094  llvm::Type *Tys[2] = { Ty, VTy };
13095  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13096  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13097  return Builder.CreateTrunc(Ops[0], HalfTy);
13098  }
13099  case NEON::BI__builtin_neon_vminvq_f16: {
13100  Int = Intrinsic::aarch64_neon_fminv;
13101  Ty = HalfTy;
13102  VTy = llvm::FixedVectorType::get(HalfTy, 8);
13103  llvm::Type *Tys[2] = { Ty, VTy };
13104  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13105  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13106  return Builder.CreateTrunc(Ops[0], HalfTy);
13107  }
13108  case NEON::BI__builtin_neon_vmaxnmv_f16: {
13109  Int = Intrinsic::aarch64_neon_fmaxnmv;
13110  Ty = HalfTy;
13111  VTy = llvm::FixedVectorType::get(HalfTy, 4);
13112  llvm::Type *Tys[2] = { Ty, VTy };
13113  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13114  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13115  return Builder.CreateTrunc(Ops[0], HalfTy);
13116  }
13117  case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13118  Int = Intrinsic::aarch64_neon_fmaxnmv;
13119  Ty = HalfTy;
13120  VTy = llvm::FixedVectorType::get(HalfTy, 8);
13121  llvm::Type *Tys[2] = { Ty, VTy };
13122  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13123  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13124  return Builder.CreateTrunc(Ops[0], HalfTy);
13125  }
13126  case NEON::BI__builtin_neon_vminnmv_f16: {
13127  Int = Intrinsic::aarch64_neon_fminnmv;
13128  Ty = HalfTy;
13129  VTy = llvm::FixedVectorType::get(HalfTy, 4);
13130  llvm::Type *Tys[2] = { Ty, VTy };
13131  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13132  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13133  return Builder.CreateTrunc(Ops[0], HalfTy);
13134  }
13135  case NEON::BI__builtin_neon_vminnmvq_f16: {
13136  Int = Intrinsic::aarch64_neon_fminnmv;
13137  Ty = HalfTy;
13138  VTy = llvm::FixedVectorType::get(HalfTy, 8);
13139  llvm::Type *Tys[2] = { Ty, VTy };
13140  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13141  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13142  return Builder.CreateTrunc(Ops[0], HalfTy);
13143  }
13144  case NEON::BI__builtin_neon_vmul_n_f64: {
13145  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13146  Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13147  return Builder.CreateFMul(Ops[0], RHS);
13148  }
13149  case NEON::BI__builtin_neon_vaddlv_u8: {
13150  Int = Intrinsic::aarch64_neon_uaddlv;
13151  Ty = Int32Ty;
13152  VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13153  llvm::Type *Tys[2] = { Ty, VTy };
13154  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13155  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13156  return Builder.CreateTrunc(Ops[0], Int16Ty);
13157  }
13158  case NEON::BI__builtin_neon_vaddlv_u16: {
13159  Int = Intrinsic::aarch64_neon_uaddlv;
13160  Ty = Int32Ty;
13161  VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13162  llvm::Type *Tys[2] = { Ty, VTy };
13163  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13164  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13165  }
13166  case NEON::BI__builtin_neon_vaddlvq_u8: {
13167  Int = Intrinsic::aarch64_neon_uaddlv;
13168  Ty = Int32Ty;
13169  VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13170  llvm::Type *Tys[2] = { Ty, VTy };
13171  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13172  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13173  return Builder.CreateTrunc(Ops[0], Int16Ty);
13174  }
13175  case NEON::BI__builtin_neon_vaddlvq_u16: {
13176  Int = Intrinsic::aarch64_neon_uaddlv;
13177  Ty = Int32Ty;
13178  VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13179  llvm::Type *Tys[2] = { Ty, VTy };
13180  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13181  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13182  }
13183  case NEON::BI__builtin_neon_vaddlv_s8: {
13184  Int = Intrinsic::aarch64_neon_saddlv;
13185  Ty = Int32Ty;
13186  VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13187  llvm::Type *Tys[2] = { Ty, VTy };
13188  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13189  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13190  return Builder.CreateTrunc(Ops[0], Int16Ty);
13191  }
13192  case NEON::BI__builtin_neon_vaddlv_s16: {
13193  Int = Intrinsic::aarch64_neon_saddlv;
13194  Ty = Int32Ty;
13195  VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13196  llvm::Type *Tys[2] = { Ty, VTy };
13197  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13198  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13199  }
13200  case NEON::BI__builtin_neon_vaddlvq_s8: {
13201  Int = Intrinsic::aarch64_neon_saddlv;
13202  Ty = Int32Ty;
13203  VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13204  llvm::Type *Tys[2] = { Ty, VTy };
13205  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13206  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13207  return Builder.CreateTrunc(Ops[0], Int16Ty);
13208  }
13209  case NEON::BI__builtin_neon_vaddlvq_s16: {
13210  Int = Intrinsic::aarch64_neon_saddlv;
13211  Ty = Int32Ty;
13212  VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13213  llvm::Type *Tys[2] = { Ty, VTy };
13214  Ops.push_back(EmitScalarExpr(E->getArg(0)));
13215  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13216  }
13217  case NEON::BI__builtin_neon_vsri_n_v:
13218  case NEON::BI__builtin_neon_vsriq_n_v: {
13219  Int = Intrinsic::aarch64_neon_vsri;
13220  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13221  return EmitNeonCall(Intrin, Ops, "vsri_n");
13222  }
13223  case NEON::BI__builtin_neon_vsli_n_v:
13224  case NEON::BI__builtin_neon_vsliq_n_v: {
13225  Int = Intrinsic::aarch64_neon_vsli;
13226  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13227  return EmitNeonCall(Intrin, Ops, "vsli_n");
13228  }
13229  case NEON::BI__builtin_neon_vsra_n_v:
13230  case NEON::BI__builtin_neon_vsraq_n_v:
13231  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13232  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13233  return Builder.CreateAdd(Ops[0], Ops[1]);
13234  case NEON::BI__builtin_neon_vrsra_n_v:
13235  case NEON::BI__builtin_neon_vrsraq_n_v: {
13236  Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13238  TmpOps.push_back(Ops[1]);
13239  TmpOps.push_back(Ops[2]);
13240  Function* F = CGM.getIntrinsic(Int, Ty);
13241  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13242  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13243  return Builder.CreateAdd(Ops[0], tmp);
13244  }
13245  case NEON::BI__builtin_neon_vld1_v:
13246  case NEON::BI__builtin_neon_vld1q_v: {
13247  return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13248  }
13249  case NEON::BI__builtin_neon_vst1_v:
13250  case NEON::BI__builtin_neon_vst1q_v:
13251  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13252  return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13253  case NEON::BI__builtin_neon_vld1_lane_v:
13254  case NEON::BI__builtin_neon_vld1q_lane_v: {
13255  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13256  Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13257  PtrOp0.getAlignment());
13258  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13259  }
13260  case NEON::BI__builtin_neon_vldap1_lane_s64:
13261  case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13262  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13263  llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13264  VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13265  LI->setAtomic(llvm::AtomicOrdering::Acquire);
13266  Ops[0] = LI;
13267  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13268  }
13269  case NEON::BI__builtin_neon_vld1_dup_v:
13270  case NEON::BI__builtin_neon_vld1q_dup_v: {
13271  Value *V = PoisonValue::get(Ty);
13272  Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13273  PtrOp0.getAlignment());
13274  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13275  Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13276  return EmitNeonSplat(Ops[0], CI);
13277  }
13278  case NEON::BI__builtin_neon_vst1_lane_v:
13279  case NEON::BI__builtin_neon_vst1q_lane_v:
13280  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13281  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13282  return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13283  case NEON::BI__builtin_neon_vstl1_lane_s64:
13284  case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13285  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13286  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13287  llvm::StoreInst *SI =
13288  Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13289  SI->setAtomic(llvm::AtomicOrdering::Release);
13290  return SI;
13291  }
13292  case NEON::BI__builtin_neon_vld2_v:
13293  case NEON::BI__builtin_neon_vld2q_v: {
13294  llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13295  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13296  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13297  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13298  }
13299  case NEON::BI__builtin_neon_vld3_v:
13300  case NEON::BI__builtin_neon_vld3q_v: {
13301  llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13302  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13303  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13304  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13305  }
13306  case NEON::BI__builtin_neon_vld4_v:
13307  case NEON::BI__builtin_neon_vld4q_v: {
13308  llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13309  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13310  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13311  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13312  }
13313  case NEON::BI__builtin_neon_vld2_dup_v:
13314  case NEON::BI__builtin_neon_vld2q_dup_v: {
13315  llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13316  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13317  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13318  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13319  }
13320  case NEON::BI__builtin_neon_vld3_dup_v:
13321  case NEON::BI__builtin_neon_vld3q_dup_v: {
13322  llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13323  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13324  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13325  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13326  }
13327  case NEON::BI__builtin_neon_vld4_dup_v:
13328  case NEON::BI__builtin_neon_vld4q_dup_v: {
13329  llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13330  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13331  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13332  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13333  }
13334  case NEON::BI__builtin_neon_vld2_lane_v:
13335  case NEON::BI__builtin_neon_vld2q_lane_v: {
13336  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13337  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13338  std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13339  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13340  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13341  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13342  Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13343  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13344  }
13345  case NEON::BI__builtin_neon_vld3_lane_v:
13346  case NEON::BI__builtin_neon_vld3q_lane_v: {
13347  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13348  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13349  std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13350  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13351  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13352  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13353  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13354  Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13355  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13356  }
13357  case NEON::BI__builtin_neon_vld4_lane_v:
13358  case NEON::BI__builtin_neon_vld4q_lane_v: {
13359  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13360  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13361  std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13362  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13363  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13364  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13365  Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13366  Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13367  Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13368  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13369  }
13370  case NEON::BI__builtin_neon_vst2_v:
13371  case NEON::BI__builtin_neon_vst2q_v: {
13372  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13373  llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13374  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13375  Ops, "");
13376  }
13377  case NEON::BI__builtin_neon_vst2_lane_v:
13378  case NEON::BI__builtin_neon_vst2q_lane_v: {
13379  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13380  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13381  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13382  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13383  Ops, "");
13384  }
13385  case NEON::BI__builtin_neon_vst3_v:
13386  case NEON::BI__builtin_neon_vst3q_v: {
13387  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13388  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13389  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13390  Ops, "");
13391  }
13392  case NEON::BI__builtin_neon_vst3_lane_v:
13393  case NEON::BI__builtin_neon_vst3q_lane_v: {
13394  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13395  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13396  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13397  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13398  Ops, "");
13399  }
13400  case NEON::BI__builtin_neon_vst4_v:
13401  case NEON::BI__builtin_neon_vst4q_v: {
13402  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13403  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13404  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13405  Ops, "");
13406  }
13407  case NEON::BI__builtin_neon_vst4_lane_v:
13408  case NEON::BI__builtin_neon_vst4q_lane_v: {
13409  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13410  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13411  llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13412  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13413  Ops, "");
13414  }
13415  case NEON::BI__builtin_neon_vtrn_v:
13416  case NEON::BI__builtin_neon_vtrnq_v: {
13417  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13418  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13419  Value *SV = nullptr;
13420 
13421  for (unsigned vi = 0; vi != 2; ++vi) {
13422  SmallVector<int, 16> Indices;
13423  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13424  Indices.push_back(i+vi);
13425  Indices.push_back(i+e+vi);
13426  }
13427  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13428  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13429  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13430  }
13431  return SV;
13432  }
13433  case NEON::BI__builtin_neon_vuzp_v:
13434  case NEON::BI__builtin_neon_vuzpq_v: {
13435  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13436  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13437  Value *SV = nullptr;
13438 
13439  for (unsigned vi = 0; vi != 2; ++vi) {
13440  SmallVector<int, 16> Indices;
13441  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13442  Indices.push_back(2*i+vi);
13443 
13444  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13445  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13446  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13447  }
13448  return SV;
13449  }
13450  case NEON::BI__builtin_neon_vzip_v:
13451  case NEON::BI__builtin_neon_vzipq_v: {
13452  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13453  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13454  Value *SV = nullptr;
13455 
13456  for (unsigned vi = 0; vi != 2; ++vi) {
13457  SmallVector<int, 16> Indices;
13458  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13459  Indices.push_back((i + vi*e) >> 1);
13460  Indices.push_back(((i + vi*e) >> 1)+e);
13461  }
13462  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13463  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13464  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13465  }
13466  return SV;
13467  }
13468  case NEON::BI__builtin_neon_vqtbl1q_v: {
13469  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13470  Ops, "vtbl1");
13471  }
13472  case NEON::BI__builtin_neon_vqtbl2q_v: {
13473  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13474  Ops, "vtbl2");
13475  }
13476  case NEON::BI__builtin_neon_vqtbl3q_v: {
13477  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13478  Ops, "vtbl3");
13479  }
13480  case NEON::BI__builtin_neon_vqtbl4q_v: {
13481  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13482  Ops, "vtbl4");
13483  }
13484  case NEON::BI__builtin_neon_vqtbx1q_v: {
13485  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13486  Ops, "vtbx1");
13487  }
13488  case NEON::BI__builtin_neon_vqtbx2q_v: {
13489  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13490  Ops, "vtbx2");
13491  }
13492  case NEON::BI__builtin_neon_vqtbx3q_v: {
13493  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13494  Ops, "vtbx3");
13495  }
13496  case NEON::BI__builtin_neon_vqtbx4q_v: {
13497  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13498  Ops, "vtbx4");
13499  }
13500  case NEON::BI__builtin_neon_vsqadd_v:
13501  case NEON::BI__builtin_neon_vsqaddq_v: {
13502  Int = Intrinsic::aarch64_neon_usqadd;
13503  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13504  }
13505  case NEON::BI__builtin_neon_vuqadd_v:
13506  case NEON::BI__builtin_neon_vuqaddq_v: {
13507  Int = Intrinsic::aarch64_neon_suqadd;
13508  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13509  }
13510  }
13511 }
13512 
13514  const CallExpr *E) {
13515  assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13516  BuiltinID == BPF::BI__builtin_btf_type_id ||
13517  BuiltinID == BPF::BI__builtin_preserve_type_info ||
13518  BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13519  "unexpected BPF builtin");
13520 
13521  // A sequence number, injected into IR builtin functions, to
13522  // prevent CSE given the only difference of the function
13523  // may just be the debuginfo metadata.
13524  static uint32_t BuiltinSeqNum;
13525 
13526  switch (BuiltinID) {
13527  default:
13528  llvm_unreachable("Unexpected BPF builtin");
13529  case BPF::BI__builtin_preserve_field_info: {
13530  const Expr *Arg = E->getArg(0);
13531  bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13532 
13533  if (!getDebugInfo()) {
13534  CGM.Error(E->getExprLoc(),
13535  "using __builtin_preserve_field_info() without -g");
13536  return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13537  : EmitLValue(Arg).emitRawPointer(*this);
13538  }
13539 
13540  // Enable underlying preserve_*_access_index() generation.
13541  bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13542  IsInPreservedAIRegion = true;
13543  Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13544  : EmitLValue(Arg).emitRawPointer(*this);
13545  IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13546 
13547  ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13548  Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13549 
13550  // Built the IR for the preserve_field_info intrinsic.
13551  llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13552  &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13553  {FieldAddr->getType()});
13554  return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13555  }
13556  case BPF::BI__builtin_btf_type_id:
13557  case BPF::BI__builtin_preserve_type_info: {
13558  if (!getDebugInfo()) {
13559  CGM.Error(E->getExprLoc(), "using builtin function without -g");
13560  return nullptr;
13561  }
13562 
13563  const Expr *Arg0 = E->getArg(0);
13564  llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13565  Arg0->getType(), Arg0->getExprLoc());
13566 
13567  ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13568  Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13569  Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13570 
13571  llvm::Function *FnDecl;
13572  if (BuiltinID == BPF::BI__builtin_btf_type_id)
13573  FnDecl = llvm::Intrinsic::getDeclaration(
13574  &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13575  else
13576  FnDecl = llvm::Intrinsic::getDeclaration(
13577  &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13578  CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13579  Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13580  return Fn;
13581  }
13582  case BPF::BI__builtin_preserve_enum_value: {
13583  if (!getDebugInfo()) {
13584  CGM.Error(E->getExprLoc(), "using builtin function without -g");
13585  return nullptr;
13586  }
13587 
13588  const Expr *Arg0 = E->getArg(0);
13589  llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13590  Arg0->getType(), Arg0->getExprLoc());
13591 
13592  // Find enumerator
13593  const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13594  const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13595  const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13596  const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13597 
13598  auto InitVal = Enumerator->getInitVal();
13599  std::string InitValStr;
13600  if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13601  InitValStr = std::to_string(InitVal.getSExtValue());
13602  else
13603  InitValStr = std::to_string(InitVal.getZExtValue());
13604  std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13605  Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13606 
13607  ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13608  Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13609  Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13610 
13611  llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13612  &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13613  CallInst *Fn =
13614  Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13615  Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13616  return Fn;
13617  }
13618  }
13619 }
13620 
13621 llvm::Value *CodeGenFunction::
13623  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13624  "Not a power-of-two sized vector!");
13625  bool AllConstants = true;
13626  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13627  AllConstants &= isa<Constant>(Ops[i]);
13628 
13629  // If this is a constant vector, create a ConstantVector.
13630  if (AllConstants) {
13632  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13633  CstOps.push_back(cast<Constant>(Ops[i]));
13634  return llvm::ConstantVector::get(CstOps);
13635  }
13636 
13637  // Otherwise, insertelement the values to build the vector.
13638  Value *Result = llvm::PoisonValue::get(
13639  llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13640 
13641  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13642  Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13643 
13644  return Result;
13645 }
13646 
13647 // Convert the mask from an integer type to a vector of i1.
13649  unsigned NumElts) {
13650 
13651  auto *MaskTy = llvm::FixedVectorType::get(
13652  CGF.Builder.getInt1Ty(),
13653  cast<IntegerType>(Mask->getType())->getBitWidth());
13654  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13655 
13656  // If we have less than 8 elements, then the starting mask was an i8 and
13657  // we need to extract down to the right number of elements.
13658  if (NumElts < 8) {
13659  int Indices[4];
13660  for (unsigned i = 0; i != NumElts; ++i)
13661  Indices[i] = i;
13662  MaskVec = CGF.Builder.CreateShuffleVector(
13663  MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13664  }
13665  return MaskVec;
13666 }
13667 
13669  Align Alignment) {
13670  Value *Ptr = CGF.Builder.CreateAddrSpaceCast(
13671  Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType()));
13672 
13673  Value *MaskVec = getMaskVecValue(
13674  CGF, Ops[2],
13675  cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13676 
13677  return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13678 }
13679 
13681  Align Alignment) {
13682  llvm::Type *Ty = Ops[1]->getType();
13683  Value *Ptr = CGF.Builder.CreateAddrSpaceCast(
13684  Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType()));
13685 
13686  Value *MaskVec = getMaskVecValue(
13687  CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13688 
13689  return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13690 }
13691 
13693  ArrayRef<Value *> Ops) {
13694  auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13695  Value *Ptr = Ops[0];
13696 
13697  Value *MaskVec = getMaskVecValue(
13698  CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13699 
13700  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13701  ResultTy);
13702  return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13703 }
13704 
13706  ArrayRef<Value *> Ops,
13707  bool IsCompress) {
13708  auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13709 
13710  Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13711 
13712  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13713  : Intrinsic::x86_avx512_mask_expand;
13714  llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13715  return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13716 }
13717 
13719  ArrayRef<Value *> Ops) {
13720  auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13721  Value *Ptr = Ops[0];
13722 
13723  Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13724 
13725  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13726  ResultTy);
13727  return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13728 }
13729 
13730 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13731  ArrayRef<Value *> Ops,
13732  bool InvertLHS = false) {
13733  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13734  Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13735  Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13736 
13737  if (InvertLHS)
13738  LHS = CGF.Builder.CreateNot(LHS);
13739 
13740  return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13741  Ops[0]->getType());
13742 }
13743 
13745  Value *Amt, bool IsRight) {
13746  llvm::Type *Ty = Op0->getType();
13747 
13748  // Amount may be scalar immediate, in which case create a splat vector.
13749  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13750  // we only care about the lowest log2 bits anyway.
13751  if (Amt->getType() != Ty) {
13752  unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13753  Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13754  Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13755  }
13756 
13757  unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13758  Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13759  return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13760 }
13761 
13763  bool IsSigned) {
13764  Value *Op0 = Ops[0];
13765  Value *Op1 = Ops[1];
13766  llvm::Type *Ty = Op0->getType();
13767  uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13768 
13769  CmpInst::Predicate Pred;
13770  switch (Imm) {
13771  case 0x0:
13772  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13773  break;
13774  case 0x1:
13775  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13776  break;
13777  case 0x2:
13778  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13779  break;
13780  case 0x3:
13781  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13782  break;
13783  case 0x4:
13784  Pred = ICmpInst::ICMP_EQ;
13785  break;
13786  case 0x5:
13787  Pred = ICmpInst::ICMP_NE;
13788  break;
13789  case 0x6:
13790  return llvm::Constant::getNullValue(Ty); // FALSE
13791  case 0x7:
13792  return llvm::Constant::getAllOnesValue(Ty); // TRUE
13793  default:
13794  llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13795  }
13796 
13797  Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13798  Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13799  return Res;
13800 }
13801 
13803  Value *Mask, Value *Op0, Value *Op1) {
13804 
13805  // If the mask is all ones just return first argument.
13806  if (const auto *C = dyn_cast<Constant>(Mask))
13807  if (C->isAllOnesValue())
13808  return Op0;
13809 
13810  Mask = getMaskVecValue(
13811  CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13812 
13813  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13814 }
13815 
13817  Value *Mask, Value *Op0, Value *Op1) {
13818  // If the mask is all ones just return first argument.
13819  if (const auto *C = dyn_cast<Constant>(Mask))
13820  if (C->isAllOnesValue())
13821  return Op0;
13822 
13823  auto *MaskTy = llvm::FixedVectorType::get(
13824  CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13825  Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13826  Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13827  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13828 }
13829 
13831  unsigned NumElts, Value *MaskIn) {
13832  if (MaskIn) {
13833  const auto *C = dyn_cast<Constant>(MaskIn);
13834  if (!C || !C->isAllOnesValue())
13835  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13836  }
13837 
13838  if (NumElts < 8) {
13839  int Indices[8];
13840  for (unsigned i = 0; i != NumElts; ++i)
13841  Indices[i] = i;
13842  for (unsigned i = NumElts; i != 8; ++i)
13843  Indices[i] = i % NumElts + NumElts;
13844  Cmp = CGF.Builder.CreateShuffleVector(
13845  Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13846  }
13847 
13848  return CGF.Builder.CreateBitCast(Cmp,
13849  IntegerType::get(CGF.getLLVMContext(),
13850  std::max(NumElts, 8U)));
13851 }
13852 
13853 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
13854  bool Signed, ArrayRef<Value *> Ops) {
13855  assert((Ops.size() == 2 || Ops.size() == 4) &&
13856  "Unexpected number of arguments");
13857  unsigned NumElts =
13858  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13859  Value *Cmp;
13860 
13861  if (CC == 3) {
13862  Cmp = Constant::getNullValue(
13863  llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13864  } else if (CC == 7) {
13865  Cmp = Constant::getAllOnesValue(
13866  llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13867  } else {
13868  ICmpInst::Predicate Pred;
13869  switch (CC) {
13870  default: llvm_unreachable("Unknown condition code");
13871  case 0: Pred = ICmpInst::ICMP_EQ; break;
13872  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13873  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13874  case 4: Pred = ICmpInst::ICMP_NE; break;
13875  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13876  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13877  }
13878  Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13879  }
13880 
13881  Value *MaskIn = nullptr;
13882  if (Ops.size() == 4)
13883  MaskIn = Ops[3];
13884 
13885  return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13886 }
13887 
13889  Value *Zero = Constant::getNullValue(In->getType());
13890  return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13891 }
13892 
13894  ArrayRef<Value *> Ops, bool IsSigned) {
13895  unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13896  llvm::Type *Ty = Ops[1]->getType();
13897 
13898  Value *Res;
13899  if (Rnd != 4) {
13900  Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13901  : Intrinsic::x86_avx512_uitofp_round;
13902  Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13903  Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13904  } else {
13905  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13906  Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13907  : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13908  }
13909 
13910  return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13911 }
13912 
13913 // Lowers X86 FMA intrinsics to IR.
13915  ArrayRef<Value *> Ops, unsigned BuiltinID,
13916  bool IsAddSub) {
13917 
13918  bool Subtract = false;
13919  Intrinsic::ID IID = Intrinsic::not_intrinsic;
13920  switch (BuiltinID) {
13921  default: break;
13922  case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13923  Subtract = true;
13924  [[fallthrough]];
13925  case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13926  case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13927  case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13928  IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13929  break;
13930  case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13931  Subtract = true;
13932  [[fallthrough]];
13933  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13934  case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13935  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13936  IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13937  break;
13938  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13939  Subtract = true;
13940  [[fallthrough]];
13941  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13942  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13943  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13944  IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13945  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13946  Subtract = true;
13947  [[fallthrough]];
13948  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13949  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13950  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13951  IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13952  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13953  Subtract = true;
13954  [[fallthrough]];
13955  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13956  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13957  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13958  IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13959  break;
13960  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13961  Subtract = true;
13962  [[fallthrough]];
13963  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13964  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13965  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13966  IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13967  break;
13968  }
13969 
13970  Value *A = Ops[0];
13971  Value *B = Ops[1];
13972  Value *C = Ops[2];
13973 
13974  if (Subtract)
13975  C = CGF.Builder.CreateFNeg(C);
13976 
13977  Value *Res;
13978 
13979  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13980  if (IID != Intrinsic::not_intrinsic &&
13981  (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
13982  IsAddSub)) {
13983  Function *Intr = CGF.CGM.getIntrinsic(IID);
13984  Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13985  } else {
13986  llvm::Type *Ty = A->getType();
13987  Function *FMA;
13988  if (CGF.Builder.getIsFPConstrained()) {
13989  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13990  FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13991  Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13992  } else {
13993  FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13994  Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13995  }
13996  }
13997 
13998  // Handle any required masking.
13999  Value *MaskFalseVal = nullptr;
14000  switch (BuiltinID) {
14001  case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14002  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14003  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14004  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14005  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14006  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14007  MaskFalseVal = Ops[0];
14008  break;
14009  case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14010  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14011  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14012  case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14013  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14014  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14015  MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14016  break;
14017  case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14018  case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14019  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14020  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14021  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14022  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14023  case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14024  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14025  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14026  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14027  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14028  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14029  MaskFalseVal = Ops[2];
14030  break;
14031  }
14032 
14033  if (MaskFalseVal)
14034  return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14035 
14036  return Res;
14037 }
14038 
14040  MutableArrayRef<Value *> Ops, Value *Upper,
14041  bool ZeroMask = false, unsigned PTIdx = 0,
14042  bool NegAcc = false) {
14043  unsigned Rnd = 4;
14044  if (Ops.size() > 4)
14045  Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14046 
14047  if (NegAcc)
14048  Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14049 
14050  Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14051  Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14052  Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14053  Value *Res;
14054  if (Rnd != 4) {
14055  Intrinsic::ID IID;
14056 
14057  switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14058  case 16:
14059  IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14060  break;
14061  case 32:
14062  IID = Intrinsic::x86_avx512_vfmadd_f32;
14063  break;
14064  case 64:
14065  IID = Intrinsic::x86_avx512_vfmadd_f64;
14066  break;
14067  default:
14068  llvm_unreachable("Unexpected size");
14069  }
14070  Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14071  {Ops[0], Ops[1], Ops[2], Ops[4]});
14072  } else if (CGF.Builder.getIsFPConstrained()) {
14073  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14074  Function *FMA = CGF.CGM.getIntrinsic(
14075  Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14076  Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14077  } else {
14078  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14079  Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14080  }
14081  // If we have more than 3 arguments, we need to do masking.
14082  if (Ops.size() > 3) {
14083  Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14084  : Ops[PTIdx];
14085 
14086  // If we negated the accumulator and the its the PassThru value we need to
14087  // bypass the negate. Conveniently Upper should be the same thing in this
14088  // case.
14089  if (NegAcc && PTIdx == 2)
14090  PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14091 
14092  Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14093  }
14094  return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14095 }
14096 
14097 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14098  ArrayRef<Value *> Ops) {
14099  llvm::Type *Ty = Ops[0]->getType();
14100  // Arguments have a vXi32 type so cast to vXi64.
14101  Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14102  Ty->getPrimitiveSizeInBits() / 64);
14103  Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14104  Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14105 
14106  if (IsSigned) {
14107  // Shift left then arithmetic shift right.
14108  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14109  LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14110  LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14111  RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14112  RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14113  } else {
14114  // Clear the upper bits.
14115  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14116  LHS = CGF.Builder.CreateAnd(LHS, Mask);
14117  RHS = CGF.Builder.CreateAnd(RHS, Mask);
14118  }
14119 
14120  return CGF.Builder.CreateMul(LHS, RHS);
14121 }
14122 
14123 // Emit a masked pternlog intrinsic. This only exists because the header has to
14124 // use a macro and we aren't able to pass the input argument to a pternlog
14125 // builtin and a select builtin without evaluating it twice.
14126 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14127  ArrayRef<Value *> Ops) {
14128  llvm::Type *Ty = Ops[0]->getType();
14129 
14130  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14131  unsigned EltWidth = Ty->getScalarSizeInBits();
14132  Intrinsic::ID IID;
14133  if (VecWidth == 128 && EltWidth == 32)
14134  IID = Intrinsic::x86_avx512_pternlog_d_128;
14135  else if (VecWidth == 256 && EltWidth == 32)
14136  IID = Intrinsic::x86_avx512_pternlog_d_256;
14137  else if (VecWidth == 512 && EltWidth == 32)
14138  IID = Intrinsic::x86_avx512_pternlog_d_512;
14139  else if (VecWidth == 128 && EltWidth == 64)
14140  IID = Intrinsic::x86_avx512_pternlog_q_128;
14141  else if (VecWidth == 256 && EltWidth == 64)
14142  IID = Intrinsic::x86_avx512_pternlog_q_256;
14143  else if (VecWidth == 512 && EltWidth == 64)
14144  IID = Intrinsic::x86_avx512_pternlog_q_512;
14145  else
14146  llvm_unreachable("Unexpected intrinsic");
14147 
14148  Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14149  Ops.drop_back());
14150  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14151  return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14152 }
14153 
14155  llvm::Type *DstTy) {
14156  unsigned NumberOfElements =
14157  cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14158  Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14159  return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14160 }
14161 
14162 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14163  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14164  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14165  return EmitX86CpuIs(CPUStr);
14166 }
14167 
14168 // Convert F16 halfs to floats.
14170  ArrayRef<Value *> Ops,
14171  llvm::Type *DstTy) {
14172  assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14173  "Unknown cvtph2ps intrinsic");
14174 
14175  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14176  if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14177  Function *F =
14178  CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14179  return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14180  }
14181 
14182  unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14183  Value *Src = Ops[0];
14184 
14185  // Extract the subvector.
14186  if (NumDstElts !=
14187  cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14188  assert(NumDstElts == 4 && "Unexpected vector size");
14189  Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14190  }
14191 
14192  // Bitcast from vXi16 to vXf16.
14193  auto *HalfTy = llvm::FixedVectorType::get(
14194  llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14195  Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14196 
14197  // Perform the fp-extension.
14198  Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14199 
14200  if (Ops.size() >= 3)
14201  Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14202  return Res;
14203 }
14204 
14205 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14206 
14207  llvm::Type *Int32Ty = Builder.getInt32Ty();
14208 
14209  // Matching the struct layout from the compiler-rt/libgcc structure that is
14210  // filled in:
14211  // unsigned int __cpu_vendor;
14212  // unsigned int __cpu_type;
14213  // unsigned int __cpu_subtype;
14214  // unsigned int __cpu_features[1];
14215  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14216  llvm::ArrayType::get(Int32Ty, 1));
14217 
14218  // Grab the global __cpu_model.
14219  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14220  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14221 
14222  // Calculate the index needed to access the correct field based on the
14223  // range. Also adjust the expected value.
14224  unsigned Index;
14225  unsigned Value;
14226  std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14227 #define X86_VENDOR(ENUM, STRING) \
14228  .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14229 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14230  .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14231 #define X86_CPU_TYPE(ENUM, STR) \
14232  .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14233 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14234  .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14235 #define X86_CPU_SUBTYPE(ENUM, STR) \
14236  .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14237 #include "llvm/TargetParser/X86TargetParser.def"
14238  .Default({0, 0});
14239  assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14240 
14241  // Grab the appropriate field from __cpu_model.
14242  llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14243  ConstantInt::get(Int32Ty, Index)};
14244  llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
14245  CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14247 
14248  // Check the value of the field against the requested value.
14249  return Builder.CreateICmpEQ(CpuValue,
14250  llvm::ConstantInt::get(Int32Ty, Value));
14251 }
14252 
14253 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14254  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14255  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14256  if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14257  return Builder.getFalse();
14258  return EmitX86CpuSupports(FeatureStr);
14259 }
14260 
14261 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14262  return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14263 }
14264 
14265 llvm::Value *
14266 CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14267  Value *Result = Builder.getTrue();
14268  if (FeatureMask[0] != 0) {
14269  // Matching the struct layout from the compiler-rt/libgcc structure that is
14270  // filled in:
14271  // unsigned int __cpu_vendor;
14272  // unsigned int __cpu_type;
14273  // unsigned int __cpu_subtype;
14274  // unsigned int __cpu_features[1];
14275  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14276  llvm::ArrayType::get(Int32Ty, 1));
14277 
14278  // Grab the global __cpu_model.
14279  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14280  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14281 
14282  // Grab the first (0th) element from the field __cpu_features off of the
14283  // global in the struct STy.
14284  Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14285  Builder.getInt32(0)};
14286  Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
14287  Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14289 
14290  // Check the value of the bit corresponding to the feature requested.
14291  Value *Mask = Builder.getInt32(FeatureMask[0]);
14292  Value *Bitset = Builder.CreateAnd(Features, Mask);
14293  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14294  Result = Builder.CreateAnd(Result, Cmp);
14295  }
14296 
14297  llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14298  llvm::Constant *CpuFeatures2 =
14299  CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14300  cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14301  for (int i = 1; i != 4; ++i) {
14302  const uint32_t M = FeatureMask[i];
14303  if (!M)
14304  continue;
14305  Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14306  Value *Features = Builder.CreateAlignedLoad(
14307  Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs),
14309  // Check the value of the bit corresponding to the feature requested.
14310  Value *Mask = Builder.getInt32(M);
14311  Value *Bitset = Builder.CreateAnd(Features, Mask);
14312  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14313  Result = Builder.CreateAnd(Result, Cmp);
14314  }
14315 
14316  return Result;
14317 }
14318 
14319 Value *CodeGenFunction::EmitAArch64CpuInit() {
14320  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14321  llvm::FunctionCallee Func =
14322  CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14323  cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14324  cast<llvm::GlobalValue>(Func.getCallee())
14325  ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14326  return Builder.CreateCall(Func);
14327 }
14328 
14329 Value *CodeGenFunction::EmitX86CpuInit() {
14330  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14331  /*Variadic*/ false);
14332  llvm::FunctionCallee Func =
14333  CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14334  cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14335  cast<llvm::GlobalValue>(Func.getCallee())
14336  ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14337  return Builder.CreateCall(Func);
14338 }
14339 
14340 Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14341  const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14342  StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14344  ArgStr.split(Features, "+");
14345  for (auto &Feature : Features) {
14346  Feature = Feature.trim();
14347  if (!llvm::AArch64::parseArchExtension(Feature))
14348  return Builder.getFalse();
14349  if (Feature != "default")
14350  Features.push_back(Feature);
14351  }
14352  return EmitAArch64CpuSupports(Features);
14353 }
14354 
14355 llvm::Value *
14356 CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14357  uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14358  Value *Result = Builder.getTrue();
14359  if (FeaturesMask != 0) {
14360  // Get features from structure in runtime library
14361  // struct {
14362  // unsigned long long features;
14363  // } __aarch64_cpu_features;
14364  llvm::Type *STy = llvm::StructType::get(Int64Ty);
14365  llvm::Constant *AArch64CPUFeatures =
14366  CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14367  cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14368  llvm::Value *CpuFeatures = Builder.CreateGEP(
14369  STy, AArch64CPUFeatures,
14370  {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14371  Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14373  Value *Mask = Builder.getInt64(FeaturesMask);
14374  Value *Bitset = Builder.CreateAnd(Features, Mask);
14375  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14376  Result = Builder.CreateAnd(Result, Cmp);
14377  }
14378  return Result;
14379 }
14380 
14382  const CallExpr *E) {
14383  if (BuiltinID == Builtin::BI__builtin_cpu_is)
14384  return EmitX86CpuIs(E);
14385  if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14386  return EmitX86CpuSupports(E);
14387  if (BuiltinID == Builtin::BI__builtin_cpu_init)
14388  return EmitX86CpuInit();
14389 
14390  // Handle MSVC intrinsics before argument evaluation to prevent double
14391  // evaluation.
14392  if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14393  return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14394 
14396  bool IsMaskFCmp = false;
14397  bool IsConjFMA = false;
14398 
14399  // Find out if any arguments are required to be integer constant expressions.
14400  unsigned ICEArguments = 0;
14402  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14403  assert(Error == ASTContext::GE_None && "Should not codegen an error");
14404 
14405  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14406  Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14407  }
14408 
14409  // These exist so that the builtin that takes an immediate can be bounds
14410  // checked by clang to avoid passing bad immediates to the backend. Since
14411  // AVX has a larger immediate than SSE we would need separate builtins to
14412  // do the different bounds checking. Rather than create a clang specific
14413  // SSE only builtin, this implements eight separate builtins to match gcc
14414  // implementation.
14415  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14416  Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14417  llvm::Function *F = CGM.getIntrinsic(ID);
14418  return Builder.CreateCall(F, Ops);
14419  };
14420 
14421  // For the vector forms of FP comparisons, translate the builtins directly to
14422  // IR.
14423  // TODO: The builtins could be removed if the SSE header files used vector
14424  // extension comparisons directly (vector ordered/unordered may need
14425  // additional support via __builtin_isnan()).
14426  auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14427  bool IsSignaling) {
14428  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14429  Value *Cmp;
14430  if (IsSignaling)
14431  Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14432  else
14433  Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14434  llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14435  llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14436  Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14437  return Builder.CreateBitCast(Sext, FPVecTy);
14438  };
14439 
14440  switch (BuiltinID) {
14441  default: return nullptr;
14442  case X86::BI_mm_prefetch: {
14443  Value *Address = Ops[0];
14444  ConstantInt *C = cast<ConstantInt>(Ops[1]);
14445  Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14446  Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14447  Value *Data = ConstantInt::get(Int32Ty, 1);
14448  Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14449  return Builder.CreateCall(F, {Address, RW, Locality, Data});
14450  }
14451  case X86::BI_mm_clflush: {
14452  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14453  Ops[0]);
14454  }
14455  case X86::BI_mm_lfence: {
14456  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14457  }
14458  case X86::BI_mm_mfence: {
14459  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14460  }
14461  case X86::BI_mm_sfence: {
14462  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14463  }
14464  case X86::BI_mm_pause: {
14465  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14466  }
14467  case X86::BI__rdtsc: {
14468  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14469  }
14470  case X86::BI__builtin_ia32_rdtscp: {
14471  Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14472  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14473  Ops[0]);
14474  return Builder.CreateExtractValue(Call, 0);
14475  }
14476  case X86::BI__builtin_ia32_lzcnt_u16:
14477  case X86::BI__builtin_ia32_lzcnt_u32:
14478  case X86::BI__builtin_ia32_lzcnt_u64: {
14479  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14480  return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14481  }
14482  case X86::BI__builtin_ia32_tzcnt_u16:
14483  case X86::BI__builtin_ia32_tzcnt_u32:
14484  case X86::BI__builtin_ia32_tzcnt_u64: {
14485  Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14486  return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14487  }
14488  case X86::BI__builtin_ia32_undef128:
14489  case X86::BI__builtin_ia32_undef256:
14490  case X86::BI__builtin_ia32_undef512:
14491  // The x86 definition of "undef" is not the same as the LLVM definition
14492  // (PR32176). We leave optimizing away an unnecessary zero constant to the
14493  // IR optimizer and backend.
14494  // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14495  // value, we should use that here instead of a zero.
14496  return llvm::Constant::getNullValue(ConvertType(E->getType()));
14497  case X86::BI__builtin_ia32_vec_init_v8qi:
14498  case X86::BI__builtin_ia32_vec_init_v4hi:
14499  case X86::BI__builtin_ia32_vec_init_v2si:
14500  return Builder.CreateBitCast(BuildVector(Ops),
14501  llvm::Type::getX86_MMXTy(getLLVMContext()));
14502  case X86::BI__builtin_ia32_vec_ext_v2si:
14503  case X86::BI__builtin_ia32_vec_ext_v16qi:
14504  case X86::BI__builtin_ia32_vec_ext_v8hi:
14505  case X86::BI__builtin_ia32_vec_ext_v4si:
14506  case X86::BI__builtin_ia32_vec_ext_v4sf:
14507  case X86::BI__builtin_ia32_vec_ext_v2di:
14508  case X86::BI__builtin_ia32_vec_ext_v32qi:
14509  case X86::BI__builtin_ia32_vec_ext_v16hi:
14510  case X86::BI__builtin_ia32_vec_ext_v8si:
14511  case X86::BI__builtin_ia32_vec_ext_v4di: {
14512  unsigned NumElts =
14513  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14514  uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14515  Index &= NumElts - 1;
14516  // These builtins exist so we can ensure the index is an ICE and in range.
14517  // Otherwise we could just do this in the header file.
14518  return Builder.CreateExtractElement(Ops[0], Index);
14519  }
14520  case X86::BI__builtin_ia32_vec_set_v16qi:
14521  case X86::BI__builtin_ia32_vec_set_v8hi:
14522  case X86::BI__builtin_ia32_vec_set_v4si:
14523  case X86::BI__builtin_ia32_vec_set_v2di:
14524  case X86::BI__builtin_ia32_vec_set_v32qi:
14525  case X86::BI__builtin_ia32_vec_set_v16hi:
14526  case X86::BI__builtin_ia32_vec_set_v8si:
14527  case X86::BI__builtin_ia32_vec_set_v4di: {
14528  unsigned NumElts =
14529  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14530  unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14531  Index &= NumElts - 1;
14532  // These builtins exist so we can ensure the index is an ICE and in range.
14533  // Otherwise we could just do this in the header file.
14534  return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14535  }
14536  case X86::BI_mm_setcsr:
14537  case X86::BI__builtin_ia32_ldmxcsr: {
14538  RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
14539  Builder.CreateStore(Ops[0], Tmp);
14540  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14541  Tmp.getPointer());
14542  }
14543  case X86::BI_mm_getcsr:
14544  case X86::BI__builtin_ia32_stmxcsr: {
14545  RawAddress Tmp = CreateMemTemp(E->getType());
14546  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14547  Tmp.getPointer());
14548  return Builder.CreateLoad(Tmp, "stmxcsr");
14549  }
14550  case X86::BI__builtin_ia32_xsave:
14551  case X86::BI__builtin_ia32_xsave64:
14552  case X86::BI__builtin_ia32_xrstor:
14553  case X86::BI__builtin_ia32_xrstor64:
14554  case X86::BI__builtin_ia32_xsaveopt:
14555  case X86::BI__builtin_ia32_xsaveopt64:
14556  case X86::BI__builtin_ia32_xrstors:
14557  case X86::BI__builtin_ia32_xrstors64:
14558  case X86::BI__builtin_ia32_xsavec:
14559  case X86::BI__builtin_ia32_xsavec64:
14560  case X86::BI__builtin_ia32_xsaves:
14561  case X86::BI__builtin_ia32_xsaves64:
14562  case X86::BI__builtin_ia32_xsetbv:
14563  case X86::BI_xsetbv: {
14564  Intrinsic::ID ID;
14565 #define INTRINSIC_X86_XSAVE_ID(NAME) \
14566  case X86::BI__builtin_ia32_##NAME: \
14567  ID = Intrinsic::x86_##NAME; \
14568  break
14569  switch (BuiltinID) {
14570  default: llvm_unreachable("Unsupported intrinsic!");
14571  INTRINSIC_X86_XSAVE_ID(xsave);
14572  INTRINSIC_X86_XSAVE_ID(xsave64);
14573  INTRINSIC_X86_XSAVE_ID(xrstor);
14574  INTRINSIC_X86_XSAVE_ID(xrstor64);
14575  INTRINSIC_X86_XSAVE_ID(xsaveopt);
14576  INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14577  INTRINSIC_X86_XSAVE_ID(xrstors);
14578  INTRINSIC_X86_XSAVE_ID(xrstors64);
14579  INTRINSIC_X86_XSAVE_ID(xsavec);
14580  INTRINSIC_X86_XSAVE_ID(xsavec64);
14581  INTRINSIC_X86_XSAVE_ID(xsaves);
14582  INTRINSIC_X86_XSAVE_ID(xsaves64);
14583  INTRINSIC_X86_XSAVE_ID(xsetbv);
14584  case X86::BI_xsetbv:
14585  ID = Intrinsic::x86_xsetbv;
14586  break;
14587  }
14588 #undef INTRINSIC_X86_XSAVE_ID
14589  Value *Mhi = Builder.CreateTrunc(
14590  Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14591  Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14592  Ops[1] = Mhi;
14593  Ops.push_back(Mlo);
14594  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14595  }
14596  case X86::BI__builtin_ia32_xgetbv:
14597  case X86::BI_xgetbv:
14598  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14599  case X86::BI__builtin_ia32_storedqudi128_mask:
14600  case X86::BI__builtin_ia32_storedqusi128_mask:
14601  case X86::BI__builtin_ia32_storedquhi128_mask:
14602  case X86::BI__builtin_ia32_storedquqi128_mask:
14603  case X86::BI__builtin_ia32_storeupd128_mask:
14604  case X86::BI__builtin_ia32_storeups128_mask:
14605  case X86::BI__builtin_ia32_storedqudi256_mask:
14606  case X86::BI__builtin_ia32_storedqusi256_mask:
14607  case X86::BI__builtin_ia32_storedquhi256_mask:
14608  case X86::BI__builtin_ia32_storedquqi256_mask:
14609  case X86::BI__builtin_ia32_storeupd256_mask:
14610  case X86::BI__builtin_ia32_storeups256_mask:
14611  case X86::BI__builtin_ia32_storedqudi512_mask:
14612  case X86::BI__builtin_ia32_storedqusi512_mask:
14613  case X86::BI__builtin_ia32_storedquhi512_mask:
14614  case X86::BI__builtin_ia32_storedquqi512_mask:
14615  case X86::BI__builtin_ia32_storeupd512_mask:
14616  case X86::BI__builtin_ia32_storeups512_mask:
14617  return EmitX86MaskedStore(*this, Ops, Align(1));
14618 
14619  case X86::BI__builtin_ia32_storesh128_mask:
14620  case X86::BI__builtin_ia32_storess128_mask:
14621  case X86::BI__builtin_ia32_storesd128_mask:
14622  return EmitX86MaskedStore(*this, Ops, Align(1));
14623 
14624  case X86::BI__builtin_ia32_vpopcntb_128:
14625  case X86::BI__builtin_ia32_vpopcntd_128:
14626  case X86::BI__builtin_ia32_vpopcntq_128:
14627  case X86::BI__builtin_ia32_vpopcntw_128:
14628  case X86::BI__builtin_ia32_vpopcntb_256:
14629  case X86::BI__builtin_ia32_vpopcntd_256:
14630  case X86::BI__builtin_ia32_vpopcntq_256:
14631  case X86::BI__builtin_ia32_vpopcntw_256:
14632  case X86::BI__builtin_ia32_vpopcntb_512:
14633  case X86::BI__builtin_ia32_vpopcntd_512:
14634  case X86::BI__builtin_ia32_vpopcntq_512:
14635  case X86::BI__builtin_ia32_vpopcntw_512: {
14636  llvm::Type *ResultType = ConvertType(E->getType());
14637  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14638  return Builder.CreateCall(F, Ops);
14639  }
14640  case X86::BI__builtin_ia32_cvtmask2b128:
14641  case X86::BI__builtin_ia32_cvtmask2b256:
14642  case X86::BI__builtin_ia32_cvtmask2b512:
14643  case X86::BI__builtin_ia32_cvtmask2w128:
14644  case X86::BI__builtin_ia32_cvtmask2w256:
14645  case X86::BI__builtin_ia32_cvtmask2w512:
14646  case X86::BI__builtin_ia32_cvtmask2d128:
14647  case X86::BI__builtin_ia32_cvtmask2d256:
14648  case X86::BI__builtin_ia32_cvtmask2d512:
14649  case X86::BI__builtin_ia32_cvtmask2q128:
14650  case X86::BI__builtin_ia32_cvtmask2q256:
14651  case X86::BI__builtin_ia32_cvtmask2q512:
14652  return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14653 
14654  case X86::BI__builtin_ia32_cvtb2mask128:
14655  case X86::BI__builtin_ia32_cvtb2mask256:
14656  case X86::BI__builtin_ia32_cvtb2mask512:
14657  case X86::BI__builtin_ia32_cvtw2mask128:
14658  case X86::BI__builtin_ia32_cvtw2mask256:
14659  case X86::BI__builtin_ia32_cvtw2mask512:
14660  case X86::BI__builtin_ia32_cvtd2mask128:
14661  case X86::BI__builtin_ia32_cvtd2mask256:
14662  case X86::BI__builtin_ia32_cvtd2mask512:
14663  case X86::BI__builtin_ia32_cvtq2mask128:
14664  case X86::BI__builtin_ia32_cvtq2mask256:
14665  case X86::BI__builtin_ia32_cvtq2mask512:
14666  return EmitX86ConvertToMask(*this, Ops[0]);
14667 
14668  case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14669  case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14670  case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14671  case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14672  case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14673  case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14674  return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14675  case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14676  case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14677  case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14678  case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14679  case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14680  case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14681  return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14682 
14683  case X86::BI__builtin_ia32_vfmaddss3:
14684  case X86::BI__builtin_ia32_vfmaddsd3:
14685  case X86::BI__builtin_ia32_vfmaddsh3_mask:
14686  case X86::BI__builtin_ia32_vfmaddss3_mask:
14687  case X86::BI__builtin_ia32_vfmaddsd3_mask:
14688  return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14689  case X86::BI__builtin_ia32_vfmaddss:
14690  case X86::BI__builtin_ia32_vfmaddsd:
14691  return EmitScalarFMAExpr(*this, E, Ops,
14692  Constant::getNullValue(Ops[0]->getType()));
14693  case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14694  case X86::BI__builtin_ia32_vfmaddss3_maskz:
14695  case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14696  return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14697  case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14698  case X86::BI__builtin_ia32_vfmaddss3_mask3:
14699  case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14700  return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14701  case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14702  case X86::BI__builtin_ia32_vfmsubss3_mask3:
14703  case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14704  return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14705  /*NegAcc*/ true);
14706  case X86::BI__builtin_ia32_vfmaddph:
14707  case X86::BI__builtin_ia32_vfmaddps:
14708  case X86::BI__builtin_ia32_vfmaddpd:
14709  case X86::BI__builtin_ia32_vfmaddph256:
14710  case X86::BI__builtin_ia32_vfmaddps256:
14711  case X86::BI__builtin_ia32_vfmaddpd256:
14712  case X86::BI__builtin_ia32_vfmaddph512_mask:
14713  case X86::BI__builtin_ia32_vfmaddph512_maskz:
14714  case X86::BI__builtin_ia32_vfmaddph512_mask3:
14715  case X86::BI__builtin_ia32_vfmaddps512_mask:
14716  case X86::BI__builtin_ia32_vfmaddps512_maskz:
14717  case X86::BI__builtin_ia32_vfmaddps512_mask3:
14718  case X86::BI__builtin_ia32_vfmsubps512_mask3:
14719  case X86::BI__builtin_ia32_vfmaddpd512_mask:
14720  case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14721  case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14722  case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14723  case X86::BI__builtin_ia32_vfmsubph512_mask3:
14724  return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14725  case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14726  case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14727  case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14728  case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14729  case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14730  case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14731  case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14732  case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14733  case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14734  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14735  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14736  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14737  return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14738 
14739  case X86::BI__builtin_ia32_movdqa32store128_mask:
14740  case X86::BI__builtin_ia32_movdqa64store128_mask:
14741  case X86::BI__builtin_ia32_storeaps128_mask:
14742  case X86::BI__builtin_ia32_storeapd128_mask:
14743  case X86::BI__builtin_ia32_movdqa32store256_mask:
14744  case X86::BI__builtin_ia32_movdqa64store256_mask:
14745  case X86::BI__builtin_ia32_storeaps256_mask:
14746  case X86::BI__builtin_ia32_storeapd256_mask:
14747  case X86::BI__builtin_ia32_movdqa32store512_mask:
14748  case X86::BI__builtin_ia32_movdqa64store512_mask:
14749  case X86::BI__builtin_ia32_storeaps512_mask:
14750  case X86::BI__builtin_ia32_storeapd512_mask:
14751  return EmitX86MaskedStore(
14752  *this, Ops,
14753  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14754 
14755  case X86::BI__builtin_ia32_loadups128_mask:
14756  case X86::BI__builtin_ia32_loadups256_mask:
14757  case X86::BI__builtin_ia32_loadups512_mask:
14758  case X86::BI__builtin_ia32_loadupd128_mask:
14759  case X86::BI__builtin_ia32_loadupd256_mask:
14760  case X86::BI__builtin_ia32_loadupd512_mask:
14761  case X86::BI__builtin_ia32_loaddquqi128_mask:
14762  case X86::BI__builtin_ia32_loaddquqi256_mask:
14763  case X86::BI__builtin_ia32_loaddquqi512_mask:
14764  case X86::BI__builtin_ia32_loaddquhi128_mask:
14765  case X86::BI__builtin_ia32_loaddquhi256_mask:
14766  case X86::BI__builtin_ia32_loaddquhi512_mask:
14767  case X86::BI__builtin_ia32_loaddqusi128_mask:
14768  case X86::BI__builtin_ia32_loaddqusi256_mask:
14769  case X86::BI__builtin_ia32_loaddqusi512_mask:
14770  case X86::BI__builtin_ia32_loaddqudi128_mask:
14771  case X86::BI__builtin_ia32_loaddqudi256_mask:
14772  case X86::BI__builtin_ia32_loaddqudi512_mask:
14773  return EmitX86MaskedLoad(*this, Ops, Align(1));
14774 
14775  case X86::BI__builtin_ia32_loadsh128_mask:
14776  case X86::BI__builtin_ia32_loadss128_mask:
14777  case X86::BI__builtin_ia32_loadsd128_mask:
14778  return EmitX86MaskedLoad(*this, Ops, Align(1));
14779 
14780  case X86::BI__builtin_ia32_loadaps128_mask:
14781  case X86::BI__builtin_ia32_loadaps256_mask:
14782  case X86::BI__builtin_ia32_loadaps512_mask:
14783  case X86::BI__builtin_ia32_loadapd128_mask:
14784  case X86::BI__builtin_ia32_loadapd256_mask:
14785  case X86::BI__builtin_ia32_loadapd512_mask:
14786  case X86::BI__builtin_ia32_movdqa32load128_mask:
14787  case X86::BI__builtin_ia32_movdqa32load256_mask:
14788  case X86::BI__builtin_ia32_movdqa32load512_mask:
14789  case X86::BI__builtin_ia32_movdqa64load128_mask:
14790  case X86::BI__builtin_ia32_movdqa64load256_mask:
14791  case X86::BI__builtin_ia32_movdqa64load512_mask:
14792  return EmitX86MaskedLoad(
14793  *this, Ops,
14794  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14795 
14796  case X86::BI__builtin_ia32_expandloaddf128_mask:
14797  case X86::BI__builtin_ia32_expandloaddf256_mask:
14798  case X86::BI__builtin_ia32_expandloaddf512_mask:
14799  case X86::BI__builtin_ia32_expandloadsf128_mask:
14800  case X86::BI__builtin_ia32_expandloadsf256_mask:
14801  case X86::BI__builtin_ia32_expandloadsf512_mask:
14802  case X86::BI__builtin_ia32_expandloaddi128_mask:
14803  case X86::BI__builtin_ia32_expandloaddi256_mask:
14804  case X86::BI__builtin_ia32_expandloaddi512_mask:
14805  case X86::BI__builtin_ia32_expandloadsi128_mask:
14806  case X86::BI__builtin_ia32_expandloadsi256_mask:
14807  case X86::BI__builtin_ia32_expandloadsi512_mask:
14808  case X86::BI__builtin_ia32_expandloadhi128_mask:
14809  case X86::BI__builtin_ia32_expandloadhi256_mask:
14810  case X86::BI__builtin_ia32_expandloadhi512_mask:
14811  case X86::BI__builtin_ia32_expandloadqi128_mask:
14812  case X86::BI__builtin_ia32_expandloadqi256_mask:
14813  case X86::BI__builtin_ia32_expandloadqi512_mask:
14814  return EmitX86ExpandLoad(*this, Ops);
14815 
14816  case X86::BI__builtin_ia32_compressstoredf128_mask:
14817  case X86::BI__builtin_ia32_compressstoredf256_mask:
14818  case X86::BI__builtin_ia32_compressstoredf512_mask:
14819  case X86::BI__builtin_ia32_compressstoresf128_mask:
14820  case X86::BI__builtin_ia32_compressstoresf256_mask:
14821  case X86::BI__builtin_ia32_compressstoresf512_mask:
14822  case X86::BI__builtin_ia32_compressstoredi128_mask:
14823  case X86::BI__builtin_ia32_compressstoredi256_mask:
14824  case X86::BI__builtin_ia32_compressstoredi512_mask:
14825  case X86::BI__builtin_ia32_compressstoresi128_mask:
14826  case X86::BI__builtin_ia32_compressstoresi256_mask:
14827  case X86::BI__builtin_ia32_compressstoresi512_mask:
14828  case X86::BI__builtin_ia32_compressstorehi128_mask:
14829  case X86::BI__builtin_ia32_compressstorehi256_mask:
14830  case X86::BI__builtin_ia32_compressstorehi512_mask:
14831  case X86::BI__builtin_ia32_compressstoreqi128_mask:
14832  case X86::BI__builtin_ia32_compressstoreqi256_mask:
14833  case X86::BI__builtin_ia32_compressstoreqi512_mask:
14834  return EmitX86CompressStore(*this, Ops);
14835 
14836  case X86::BI__builtin_ia32_expanddf128_mask:
14837  case X86::BI__builtin_ia32_expanddf256_mask:
14838  case X86::BI__builtin_ia32_expanddf512_mask:
14839  case X86::BI__builtin_ia32_expandsf128_mask:
14840  case X86::BI__builtin_ia32_expandsf256_mask:
14841  case X86::BI__builtin_ia32_expandsf512_mask:
14842  case X86::BI__builtin_ia32_expanddi128_mask:
14843  case X86::BI__builtin_ia32_expanddi256_mask:
14844  case X86::BI__builtin_ia32_expanddi512_mask:
14845  case X86::BI__builtin_ia32_expandsi128_mask:
14846  case X86::BI__builtin_ia32_expandsi256_mask:
14847  case X86::BI__builtin_ia32_expandsi512_mask:
14848  case X86::BI__builtin_ia32_expandhi128_mask:
14849  case X86::BI__builtin_ia32_expandhi256_mask:
14850  case X86::BI__builtin_ia32_expandhi512_mask:
14851  case X86::BI__builtin_ia32_expandqi128_mask:
14852  case X86::BI__builtin_ia32_expandqi256_mask:
14853  case X86::BI__builtin_ia32_expandqi512_mask:
14854  return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14855 
14856  case X86::BI__builtin_ia32_compressdf128_mask:
14857  case X86::BI__builtin_ia32_compressdf256_mask:
14858  case X86::BI__builtin_ia32_compressdf512_mask:
14859  case X86::BI__builtin_ia32_compresssf128_mask:
14860  case X86::BI__builtin_ia32_compresssf256_mask:
14861  case X86::BI__builtin_ia32_compresssf512_mask:
14862  case X86::BI__builtin_ia32_compressdi128_mask:
14863  case X86::BI__builtin_ia32_compressdi256_mask:
14864  case X86::BI__builtin_ia32_compressdi512_mask:
14865  case X86::BI__builtin_ia32_compresssi128_mask:
14866  case X86::BI__builtin_ia32_compresssi256_mask:
14867  case X86::BI__builtin_ia32_compresssi512_mask:
14868  case X86::BI__builtin_ia32_compresshi128_mask:
14869  case X86::BI__builtin_ia32_compresshi256_mask:
14870  case X86::BI__builtin_ia32_compresshi512_mask:
14871  case X86::BI__builtin_ia32_compressqi128_mask:
14872  case X86::BI__builtin_ia32_compressqi256_mask:
14873  case X86::BI__builtin_ia32_compressqi512_mask:
14874  return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14875 
14876  case X86::BI__builtin_ia32_gather3div2df:
14877  case X86::BI__builtin_ia32_gather3div2di:
14878  case X86::BI__builtin_ia32_gather3div4df:
14879  case X86::BI__builtin_ia32_gather3div4di:
14880  case X86::BI__builtin_ia32_gather3div4sf:
14881  case X86::BI__builtin_ia32_gather3div4si:
14882  case X86::BI__builtin_ia32_gather3div8sf:
14883  case X86::BI__builtin_ia32_gather3div8si:
14884  case X86::BI__builtin_ia32_gather3siv2df:
14885  case X86::BI__builtin_ia32_gather3siv2di:
14886  case X86::BI__builtin_ia32_gather3siv4df:
14887  case X86::BI__builtin_ia32_gather3siv4di:
14888  case X86::BI__builtin_ia32_gather3siv4sf:
14889  case X86::BI__builtin_ia32_gather3siv4si:
14890  case X86::BI__builtin_ia32_gather3siv8sf:
14891  case X86::BI__builtin_ia32_gather3siv8si:
14892  case X86::BI__builtin_ia32_gathersiv8df:
14893  case X86::BI__builtin_ia32_gathersiv16sf:
14894  case X86::BI__builtin_ia32_gatherdiv8df:
14895  case X86::BI__builtin_ia32_gatherdiv16sf:
14896  case X86::BI__builtin_ia32_gathersiv8di:
14897  case X86::BI__builtin_ia32_gathersiv16si:
14898  case X86::BI__builtin_ia32_gatherdiv8di:
14899  case X86::BI__builtin_ia32_gatherdiv16si: {
14900  Intrinsic::ID IID;
14901  switch (BuiltinID) {
14902  default: llvm_unreachable("Unexpected builtin");
14903  case X86::BI__builtin_ia32_gather3div2df:
14904  IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14905  break;
14906  case X86::BI__builtin_ia32_gather3div2di:
14907  IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14908  break;
14909  case X86::BI__builtin_ia32_gather3div4df:
14910  IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14911  break;
14912  case X86::BI__builtin_ia32_gather3div4di:
14913  IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14914  break;
14915  case X86::BI__builtin_ia32_gather3div4sf:
14916  IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14917  break;
14918  case X86::BI__builtin_ia32_gather3div4si:
14919  IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14920  break;
14921  case X86::BI__builtin_ia32_gather3div8sf:
14922  IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14923  break;
14924  case X86::BI__builtin_ia32_gather3div8si:
14925  IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14926  break;
14927  case X86::BI__builtin_ia32_gather3siv2df:
14928  IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14929  break;
14930  case X86::BI__builtin_ia32_gather3siv2di:
14931  IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14932  break;
14933  case X86::BI__builtin_ia32_gather3siv4df:
14934  IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14935  break;
14936  case X86::BI__builtin_ia32_gather3siv4di:
14937  IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14938  break;
14939  case X86::BI__builtin_ia32_gather3siv4sf:
14940  IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14941  break;
14942  case X86::BI__builtin_ia32_gather3siv4si:
14943  IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14944  break;
14945  case X86::BI__builtin_ia32_gather3siv8sf:
14946  IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14947  break;
14948  case X86::BI__builtin_ia32_gather3siv8si:
14949  IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14950  break;
14951  case X86::BI__builtin_ia32_gathersiv8df:
14952  IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14953  break;
14954  case X86::BI__builtin_ia32_gathersiv16sf:
14955  IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14956  break;
14957  case X86::BI__builtin_ia32_gatherdiv8df:
14958  IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14959  break;
14960  case X86::BI__builtin_ia32_gatherdiv16sf:
14961  IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14962  break;
14963  case X86::BI__builtin_ia32_gathersiv8di:
14964  IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14965  break;
14966  case X86::BI__builtin_ia32_gathersiv16si:
14967  IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14968  break;
14969  case X86::BI__builtin_ia32_gatherdiv8di:
14970  IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14971  break;
14972  case X86::BI__builtin_ia32_gatherdiv16si:
14973  IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14974  break;
14975  }
14976 
14977  unsigned MinElts = std::min(
14978  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
14979  cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
14980  Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
14981  Function *Intr = CGM.getIntrinsic(IID);
14982  return Builder.CreateCall(Intr, Ops);
14983  }
14984 
14985  case X86::BI__builtin_ia32_scattersiv8df:
14986  case X86::BI__builtin_ia32_scattersiv16sf:
14987  case X86::BI__builtin_ia32_scatterdiv8df:
14988  case X86::BI__builtin_ia32_scatterdiv16sf:
14989  case X86::BI__builtin_ia32_scattersiv8di:
14990  case X86::BI__builtin_ia32_scattersiv16si:
14991  case X86::BI__builtin_ia32_scatterdiv8di:
14992  case X86::BI__builtin_ia32_scatterdiv16si:
14993  case X86::BI__builtin_ia32_scatterdiv2df:
14994  case X86::BI__builtin_ia32_scatterdiv2di:
14995  case X86::BI__builtin_ia32_scatterdiv4df:
14996  case X86::BI__builtin_ia32_scatterdiv4di:
14997  case X86::BI__builtin_ia32_scatterdiv4sf:
14998  case X86::BI__builtin_ia32_scatterdiv4si:
14999  case X86::BI__builtin_ia32_scatterdiv8sf:
15000  case X86::BI__builtin_ia32_scatterdiv8si:
15001  case X86::BI__builtin_ia32_scattersiv2df:
15002  case X86::BI__builtin_ia32_scattersiv2di:
15003  case X86::BI__builtin_ia32_scattersiv4df:
15004  case X86::BI__builtin_ia32_scattersiv4di:
15005  case X86::BI__builtin_ia32_scattersiv4sf:
15006  case X86::BI__builtin_ia32_scattersiv4si:
15007  case X86::BI__builtin_ia32_scattersiv8sf:
15008  case X86::BI__builtin_ia32_scattersiv8si: {
15009  Intrinsic::ID IID;
15010  switch (BuiltinID) {
15011  default: llvm_unreachable("Unexpected builtin");
15012  case X86::BI__builtin_ia32_scattersiv8df:
15013  IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15014  break;
15015  case X86::BI__builtin_ia32_scattersiv16sf:
15016  IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15017  break;
15018  case X86::BI__builtin_ia32_scatterdiv8df:
15019  IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15020  break;
15021  case X86::BI__builtin_ia32_scatterdiv16sf:
15022  IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15023  break;
15024  case X86::BI__builtin_ia32_scattersiv8di:
15025  IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15026  break;
15027  case X86::BI__builtin_ia32_scattersiv16si:
15028  IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15029  break;
15030  case X86::BI__builtin_ia32_scatterdiv8di:
15031  IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15032  break;
15033  case X86::BI__builtin_ia32_scatterdiv16si:
15034  IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15035  break;
15036  case X86::BI__builtin_ia32_scatterdiv2df:
15037  IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15038  break;
15039  case X86::BI__builtin_ia32_scatterdiv2di:
15040  IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15041  break;
15042  case X86::BI__builtin_ia32_scatterdiv4df:
15043  IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15044  break;
15045  case X86::BI__builtin_ia32_scatterdiv4di:
15046  IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15047  break;
15048  case X86::BI__builtin_ia32_scatterdiv4sf:
15049  IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15050  break;
15051  case X86::BI__builtin_ia32_scatterdiv4si:
15052  IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15053  break;
15054  case X86::BI__builtin_ia32_scatterdiv8sf:
15055  IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15056  break;
15057  case X86::BI__builtin_ia32_scatterdiv8si:
15058  IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15059  break;
15060  case X86::BI__builtin_ia32_scattersiv2df:
15061  IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15062  break;
15063  case X86::BI__builtin_ia32_scattersiv2di:
15064  IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15065  break;
15066  case X86::BI__builtin_ia32_scattersiv4df:
15067  IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15068  break;
15069  case X86::BI__builtin_ia32_scattersiv4di:
15070  IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15071  break;
15072  case X86::BI__builtin_ia32_scattersiv4sf:
15073  IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15074  break;
15075  case X86::BI__builtin_ia32_scattersiv4si:
15076  IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15077  break;
15078  case X86::BI__builtin_ia32_scattersiv8sf:
15079  IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15080  break;
15081  case X86::BI__builtin_ia32_scattersiv8si:
15082  IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15083  break;
15084  }
15085 
15086  unsigned MinElts = std::min(
15087  cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15088  cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15089  Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15090  Function *Intr = CGM.getIntrinsic(IID);
15091  return Builder.CreateCall(Intr, Ops);
15092  }
15093 
15094  case X86::BI__builtin_ia32_vextractf128_pd256:
15095  case X86::BI__builtin_ia32_vextractf128_ps256:
15096  case X86::BI__builtin_ia32_vextractf128_si256:
15097  case X86::BI__builtin_ia32_extract128i256:
15098  case X86::BI__builtin_ia32_extractf64x4_mask:
15099  case X86::BI__builtin_ia32_extractf32x4_mask:
15100  case X86::BI__builtin_ia32_extracti64x4_mask:
15101  case X86::BI__builtin_ia32_extracti32x4_mask:
15102  case X86::BI__builtin_ia32_extractf32x8_mask:
15103  case X86::BI__builtin_ia32_extracti32x8_mask:
15104  case X86::BI__builtin_ia32_extractf32x4_256_mask:
15105  case X86::BI__builtin_ia32_extracti32x4_256_mask:
15106  case X86::BI__builtin_ia32_extractf64x2_256_mask:
15107  case X86::BI__builtin_ia32_extracti64x2_256_mask:
15108  case X86::BI__builtin_ia32_extractf64x2_512_mask:
15109  case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15110  auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15111  unsigned NumElts = DstTy->getNumElements();
15112  unsigned SrcNumElts =
15113  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15114  unsigned SubVectors = SrcNumElts / NumElts;
15115  unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15116  assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15117  Index &= SubVectors - 1; // Remove any extra bits.
15118  Index *= NumElts;
15119 
15120  int Indices[16];
15121  for (unsigned i = 0; i != NumElts; ++i)
15122  Indices[i] = i + Index;
15123 
15124  Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15125  "extract");
15126 
15127  if (Ops.size() == 4)
15128  Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15129 
15130  return Res;
15131  }
15132  case X86::BI__builtin_ia32_vinsertf128_pd256:
15133  case X86::BI__builtin_ia32_vinsertf128_ps256:
15134  case X86::BI__builtin_ia32_vinsertf128_si256:
15135  case X86::BI__builtin_ia32_insert128i256:
15136  case X86::BI__builtin_ia32_insertf64x4:
15137  case X86::BI__builtin_ia32_insertf32x4:
15138  case X86::BI__builtin_ia32_inserti64x4:
15139  case X86::BI__builtin_ia32_inserti32x4:
15140  case X86::BI__builtin_ia32_insertf32x8:
15141  case X86::BI__builtin_ia32_inserti32x8:
15142  case X86::BI__builtin_ia32_insertf32x4_256:
15143  case X86::BI__builtin_ia32_inserti32x4_256:
15144  case X86::BI__builtin_ia32_insertf64x2_256:
15145  case X86::BI__builtin_ia32_inserti64x2_256:
15146  case X86::BI__builtin_ia32_insertf64x2_512:
15147  case X86::BI__builtin_ia32_inserti64x2_512: {
15148  unsigned DstNumElts =
15149  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15150  unsigned SrcNumElts =
15151  cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15152  unsigned SubVectors = DstNumElts / SrcNumElts;
15153  unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15154  assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15155  Index &= SubVectors - 1; // Remove any extra bits.
15156  Index *= SrcNumElts;
15157 
15158  int Indices[16];
15159  for (unsigned i = 0; i != DstNumElts; ++i)
15160  Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15161 
15162  Value *Op1 = Builder.CreateShuffleVector(
15163  Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15164 
15165  for (unsigned i = 0; i != DstNumElts; ++i) {
15166  if (i >= Index && i < (Index + SrcNumElts))
15167  Indices[i] = (i - Index) + DstNumElts;
15168  else
15169  Indices[i] = i;
15170  }
15171 
15172  return Builder.CreateShuffleVector(Ops[0], Op1,
15173  ArrayRef(Indices, DstNumElts), "insert");
15174  }
15175  case X86::BI__builtin_ia32_pmovqd512_mask:
15176  case X86::BI__builtin_ia32_pmovwb512_mask: {
15177  Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15178  return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15179  }
15180  case X86::BI__builtin_ia32_pmovdb512_mask:
15181  case X86::BI__builtin_ia32_pmovdw512_mask:
15182  case X86::BI__builtin_ia32_pmovqw512_mask: {
15183  if (const auto *C = dyn_cast<Constant>(Ops[2]))
15184  if (C->isAllOnesValue())
15185  return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15186 
15187  Intrinsic::ID IID;
15188  switch (BuiltinID) {
15189  default: llvm_unreachable("Unsupported intrinsic!");
15190  case X86::BI__builtin_ia32_pmovdb512_mask:
15191  IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15192  break;
15193  case X86::BI__builtin_ia32_pmovdw512_mask:
15194  IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15195  break;
15196  case X86::BI__builtin_ia32_pmovqw512_mask:
15197  IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15198  break;
15199  }
15200 
15201  Function *Intr = CGM.getIntrinsic(IID);
15202  return Builder.CreateCall(Intr, Ops);
15203  }
15204  case X86::BI__builtin_ia32_pblendw128:
15205  case X86::BI__builtin_ia32_blendpd:
15206  case X86::BI__builtin_ia32_blendps:
15207  case X86::BI__builtin_ia32_blendpd256:
15208  case X86::BI__builtin_ia32_blendps256:
15209  case X86::BI__builtin_ia32_pblendw256:
15210  case X86::BI__builtin_ia32_pblendd128:
15211  case X86::BI__builtin_ia32_pblendd256: {
15212  unsigned NumElts =
15213  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15214  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15215 
15216  int Indices[16];
15217  // If there are more than 8 elements, the immediate is used twice so make
15218  // sure we handle that.
15219  for (unsigned i = 0; i != NumElts; ++i)
15220  Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15221 
15222  return Builder.CreateShuffleVector(Ops[0], Ops[1],
15223  ArrayRef(Indices, NumElts), "blend");
15224  }
15225  case X86::BI__builtin_ia32_pshuflw:
15226  case X86::BI__builtin_ia32_pshuflw256:
15227  case X86::BI__builtin_ia32_pshuflw512: {
15228  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15229  auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15230  unsigned NumElts = Ty->getNumElements();
15231 
15232  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15233  Imm = (Imm & 0xff) * 0x01010101;
15234 
15235  int Indices[32];
15236  for (unsigned l = 0; l != NumElts; l += 8) {
15237  for (unsigned i = 0; i != 4; ++i) {
15238  Indices[l + i] = l + (Imm & 3);
15239  Imm >>= 2;
15240  }
15241  for (unsigned i = 4; i != 8; ++i)
15242  Indices[l + i] = l + i;
15243  }
15244 
15245  return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15246  "pshuflw");
15247  }
15248  case X86::BI__builtin_ia32_pshufhw:
15249  case X86::BI__builtin_ia32_pshufhw256:
15250  case X86::BI__builtin_ia32_pshufhw512: {
15251  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15252  auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15253  unsigned NumElts = Ty->getNumElements();
15254 
15255  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15256  Imm = (Imm & 0xff) * 0x01010101;
15257 
15258  int Indices[32];
15259  for (unsigned l = 0; l != NumElts; l += 8) {
15260  for (unsigned i = 0; i != 4; ++i)
15261  Indices[l + i] = l + i;
15262  for (unsigned i = 4; i != 8; ++i) {
15263  Indices[l + i] = l + 4 + (Imm & 3);
15264  Imm >>= 2;
15265  }
15266  }
15267 
15268  return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15269  "pshufhw");
15270  }
15271  case X86::BI__builtin_ia32_pshufd:
15272  case X86::BI__builtin_ia32_pshufd256:
15273  case X86::BI__builtin_ia32_pshufd512:
15274  case X86::BI__builtin_ia32_vpermilpd:
15275  case X86::BI__builtin_ia32_vpermilps:
15276  case X86::BI__builtin_ia32_vpermilpd256:
15277  case X86::BI__builtin_ia32_vpermilps256:
15278  case X86::BI__builtin_ia32_vpermilpd512:
15279  case X86::BI__builtin_ia32_vpermilps512: {
15280  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15281  auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15282  unsigned NumElts = Ty->getNumElements();
15283  unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15284  unsigned NumLaneElts = NumElts / NumLanes;
15285 
15286  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15287  Imm = (Imm & 0xff) * 0x01010101;
15288 
15289  int Indices[16];
15290  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15291  for (unsigned i = 0; i != NumLaneElts; ++i) {
15292  Indices[i + l] = (Imm % NumLaneElts) + l;
15293  Imm /= NumLaneElts;
15294  }
15295  }
15296 
15297  return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15298  "permil");
15299  }
15300  case X86::BI__builtin_ia32_shufpd:
15301  case X86::BI__builtin_ia32_shufpd256:
15302  case X86::BI__builtin_ia32_shufpd512:
15303  case X86::BI__builtin_ia32_shufps:
15304  case X86::BI__builtin_ia32_shufps256:
15305  case X86::BI__builtin_ia32_shufps512: {
15306  uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15307  auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15308  unsigned NumElts = Ty->getNumElements();
15309  unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15310  unsigned NumLaneElts = NumElts / NumLanes;
15311 
15312  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15313  Imm = (Imm & 0xff) * 0x01010101;
15314 
15315  int Indices[16];
15316  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15317  for (unsigned i = 0; i != NumLaneElts; ++i) {
15318  unsigned Index = Imm % NumLaneElts;
15319  Imm /= NumLaneElts;
15320  if (i >= (NumLaneElts / 2))
15321  Index += NumElts;
15322  Indices[l + i] = l + Index;
15323  }
15324  }
15325 
15326  return Builder.CreateShuffleVector(Ops[0], Ops[1],
15327  ArrayRef(Indices, NumElts), "shufp");
15328  }
15329  case X86::BI__builtin_ia32_permdi256:
15330  case X86::BI__builtin_ia32_permdf256:
15331  case X86::BI__builtin_ia32_permdi512:
15332  case X86::BI__builtin_ia32_permdf512: {
15333  unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15334  auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15335  unsigned NumElts = Ty->getNumElements();
15336 
15337  // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15338  int Indices[8];
15339  for (unsigned l = 0; l != NumElts; l += 4)
15340  for (unsigned i = 0; i != 4; ++i)
15341  Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15342 
15343  return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15344  "perm");
15345  }
15346  case X86::BI__builtin_ia32_palignr128:
15347  case X86::BI__builtin_ia32_palignr256:
15348  case X86::BI__builtin_ia32_palignr512: {
15349  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15350 
15351  unsigned NumElts =
15352  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15353  assert(NumElts % 16 == 0);
15354 
15355  // If palignr is shifting the pair of vectors more than the size of two
15356  // lanes, emit zero.
15357  if (ShiftVal >= 32)
15358  return llvm::Constant::getNullValue(ConvertType(E->getType()));
15359 
15360  // If palignr is shifting the pair of input vectors more than one lane,
15361  // but less than two lanes, convert to shifting in zeroes.
15362  if (ShiftVal > 16) {
15363  ShiftVal -= 16;
15364  Ops[1] = Ops[0];
15365  Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15366  }
15367 
15368  int Indices[64];
15369  // 256-bit palignr operates on 128-bit lanes so we need to handle that
15370  for (unsigned l = 0; l != NumElts; l += 16) {
15371  for (unsigned i = 0; i != 16; ++i) {
15372  unsigned Idx = ShiftVal + i;
15373  if (Idx >= 16)
15374  Idx += NumElts - 16; // End of lane, switch operand.
15375  Indices[l + i] = Idx + l;
15376  }
15377  }
15378 
15379  return Builder.CreateShuffleVector(Ops[1], Ops[0],
15380  ArrayRef(Indices, NumElts), "palignr");
15381  }
15382  case X86::BI__builtin_ia32_alignd128:
15383  case X86::BI__builtin_ia32_alignd256:
15384  case X86::BI__builtin_ia32_alignd512:
15385  case X86::BI__builtin_ia32_alignq128:
15386  case X86::BI__builtin_ia32_alignq256:
15387  case X86::BI__builtin_ia32_alignq512: {
15388  unsigned NumElts =
15389  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15390  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15391 
15392  // Mask the shift amount to width of a vector.
15393  ShiftVal &= NumElts - 1;
15394 
15395  int Indices[16];
15396  for (unsigned i = 0; i != NumElts; ++i)
15397  Indices[i] = i + ShiftVal;
15398 
15399  return Builder.CreateShuffleVector(Ops[1], Ops[0],
15400  ArrayRef(Indices, NumElts), "valign");
15401  }
15402  case X86::BI__builtin_ia32_shuf_f32x4_256:
15403  case X86::BI__builtin_ia32_shuf_f64x2_256:
15404  case X86::BI__builtin_ia32_shuf_i32x4_256:
15405  case X86::BI__builtin_ia32_shuf_i64x2_256:
15406  case X86::BI__builtin_ia32_shuf_f32x4:
15407  case X86::BI__builtin_ia32_shuf_f64x2:
15408  case X86::BI__builtin_ia32_shuf_i32x4:
15409  case X86::BI__builtin_ia32_shuf_i64x2: {
15410  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15411  auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15412  unsigned NumElts = Ty->getNumElements();
15413  unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15414  unsigned NumLaneElts = NumElts / NumLanes;
15415 
15416  int Indices[16];
15417  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15418  unsigned Index = (Imm % NumLanes) * NumLaneElts;
15419  Imm /= NumLanes; // Discard the bits we just used.
15420  if (l >= (NumElts / 2))
15421  Index += NumElts; // Switch to other source.
15422  for (unsigned i = 0; i != NumLaneElts; ++i) {
15423  Indices[l + i] = Index + i;
15424  }
15425  }
15426 
15427  return Builder.CreateShuffleVector(Ops[0], Ops[1],
15428  ArrayRef(Indices, NumElts), "shuf");
15429  }
15430 
15431  case X86::BI__builtin_ia32_vperm2f128_pd256:
15432  case X86::BI__builtin_ia32_vperm2f128_ps256:
15433  case X86::BI__builtin_ia32_vperm2f128_si256:
15434  case X86::BI__builtin_ia32_permti256: {
15435  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15436  unsigned NumElts =
15437  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15438 
15439  // This takes a very simple approach since there are two lanes and a
15440  // shuffle can have 2 inputs. So we reserve the first input for the first
15441  // lane and the second input for the second lane. This may result in
15442  // duplicate sources, but this can be dealt with in the backend.
15443 
15444  Value *OutOps[2];
15445  int Indices[8];
15446  for (unsigned l = 0; l != 2; ++l) {
15447  // Determine the source for this lane.
15448  if (Imm & (1 << ((l * 4) + 3)))
15449  OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15450  else if (Imm & (1 << ((l * 4) + 1)))
15451  OutOps[l] = Ops[1];
15452  else
15453  OutOps[l] = Ops[0];
15454 
15455  for (unsigned i = 0; i != NumElts/2; ++i) {
15456  // Start with ith element of the source for this lane.
15457  unsigned Idx = (l * NumElts) + i;
15458  // If bit 0 of the immediate half is set, switch to the high half of
15459  // the source.
15460  if (Imm & (1 << (l * 4)))
15461  Idx += NumElts/2;
15462  Indices[(l * (NumElts/2)) + i] = Idx;
15463  }
15464  }
15465 
15466  return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15467  ArrayRef(Indices, NumElts), "vperm");
15468  }
15469 
15470  case X86::BI__builtin_ia32_pslldqi128_byteshift:
15471  case X86::BI__builtin_ia32_pslldqi256_byteshift:
15472  case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15473  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15474  auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15475  // Builtin type is vXi64 so multiply by 8 to get bytes.
15476  unsigned NumElts = ResultType->getNumElements() * 8;
15477 
15478  // If pslldq is shifting the vector more than 15 bytes, emit zero.
15479  if (ShiftVal >= 16)
15480  return llvm::Constant::getNullValue(ResultType);
15481 
15482  int Indices[64];
15483  // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15484  for (unsigned l = 0; l != NumElts; l += 16) {
15485  for (unsigned i = 0; i != 16; ++i) {
15486  unsigned Idx = NumElts + i - ShiftVal;
15487  if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15488  Indices[l + i] = Idx + l;
15489  }
15490  }
15491 
15492  auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15493  Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15494  Value *Zero = llvm::Constant::getNullValue(VecTy);
15495  Value *SV = Builder.CreateShuffleVector(
15496  Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15497  return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15498  }
15499  case X86::BI__builtin_ia32_psrldqi128_byteshift:
15500  case X86::BI__builtin_ia32_psrldqi256_byteshift:
15501  case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15502  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15503  auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15504  // Builtin type is vXi64 so multiply by 8 to get bytes.
15505  unsigned NumElts = ResultType->getNumElements() * 8;
15506 
15507  // If psrldq is shifting the vector more than 15 bytes, emit zero.
15508  if (ShiftVal >= 16)
15509  return llvm::Constant::getNullValue(ResultType);
15510 
15511  int Indices[64];
15512  // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15513  for (unsigned l = 0; l != NumElts; l += 16) {
15514  for (unsigned i = 0; i != 16; ++i) {
15515  unsigned Idx = i + ShiftVal;
15516  if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15517  Indices[l + i] = Idx + l;
15518  }
15519  }
15520 
15521  auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15522  Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15523  Value *Zero = llvm::Constant::getNullValue(VecTy);
15524  Value *SV = Builder.CreateShuffleVector(
15525  Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15526  return Builder.CreateBitCast(SV, ResultType, "cast");
15527  }
15528  case X86::BI__builtin_ia32_kshiftliqi:
15529  case X86::BI__builtin_ia32_kshiftlihi:
15530  case X86::BI__builtin_ia32_kshiftlisi:
15531  case X86::BI__builtin_ia32_kshiftlidi: {
15532  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15533  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15534 
15535  if (ShiftVal >= NumElts)
15536  return llvm::Constant::getNullValue(Ops[0]->getType());
15537 
15538  Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15539 
15540  int Indices[64];
15541  for (unsigned i = 0; i != NumElts; ++i)
15542  Indices[i] = NumElts + i - ShiftVal;
15543 
15544  Value *Zero = llvm::Constant::getNullValue(In->getType());
15545  Value *SV = Builder.CreateShuffleVector(
15546  Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15547  return Builder.CreateBitCast(SV, Ops[0]->getType());
15548  }
15549  case X86::BI__builtin_ia32_kshiftriqi:
15550  case X86::BI__builtin_ia32_kshiftrihi:
15551  case X86::BI__builtin_ia32_kshiftrisi:
15552  case X86::BI__builtin_ia32_kshiftridi: {
15553  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15554  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15555 
15556  if (ShiftVal >= NumElts)
15557  return llvm::Constant::getNullValue(Ops[0]->getType());
15558 
15559  Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15560 
15561  int Indices[64];
15562  for (unsigned i = 0; i != NumElts; ++i)
15563  Indices[i] = i + ShiftVal;
15564 
15565  Value *Zero = llvm::Constant::getNullValue(In->getType());
15566  Value *SV = Builder.CreateShuffleVector(
15567  In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15568  return Builder.CreateBitCast(SV, Ops[0]->getType());
15569  }
15570  case X86::BI__builtin_ia32_movnti:
15571  case X86::BI__builtin_ia32_movnti64:
15572  case X86::BI__builtin_ia32_movntsd:
15573  case X86::BI__builtin_ia32_movntss: {
15574  llvm::MDNode *Node = llvm::MDNode::get(
15575  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15576 
15577  Value *Ptr = Ops[0];
15578  Value *Src = Ops[1];
15579 
15580  // Extract the 0'th element of the source vector.
15581  if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15582  BuiltinID == X86::BI__builtin_ia32_movntss)
15583  Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15584 
15585  // Unaligned nontemporal store of the scalar value.
15586  StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15587  SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15588  SI->setAlignment(llvm::Align(1));
15589  return SI;
15590  }
15591  // Rotate is a special case of funnel shift - 1st 2 args are the same.
15592  case X86::BI__builtin_ia32_vprotb:
15593  case X86::BI__builtin_ia32_vprotw:
15594  case X86::BI__builtin_ia32_vprotd:
15595  case X86::BI__builtin_ia32_vprotq:
15596  case X86::BI__builtin_ia32_vprotbi:
15597  case X86::BI__builtin_ia32_vprotwi:
15598  case X86::BI__builtin_ia32_vprotdi:
15599  case X86::BI__builtin_ia32_vprotqi:
15600  case X86::BI__builtin_ia32_prold128:
15601  case X86::BI__builtin_ia32_prold256:
15602  case X86::BI__builtin_ia32_prold512:
15603  case X86::BI__builtin_ia32_prolq128:
15604  case X86::BI__builtin_ia32_prolq256:
15605  case X86::BI__builtin_ia32_prolq512:
15606  case X86::BI__builtin_ia32_prolvd128:
15607  case X86::BI__builtin_ia32_prolvd256:
15608  case X86::BI__builtin_ia32_prolvd512:
15609  case X86::BI__builtin_ia32_prolvq128:
15610  case X86::BI__builtin_ia32_prolvq256:
15611  case X86::BI__builtin_ia32_prolvq512:
15612  return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15613  case X86::BI__builtin_ia32_prord128:
15614  case X86::BI__builtin_ia32_prord256:
15615  case X86::BI__builtin_ia32_prord512:
15616  case X86::BI__builtin_ia32_prorq128:
15617  case X86::BI__builtin_ia32_prorq256:
15618  case X86::BI__builtin_ia32_prorq512:
15619  case X86::BI__builtin_ia32_prorvd128:
15620  case X86::BI__builtin_ia32_prorvd256:
15621  case X86::BI__builtin_ia32_prorvd512:
15622  case X86::BI__builtin_ia32_prorvq128:
15623  case X86::BI__builtin_ia32_prorvq256:
15624  case X86::BI__builtin_ia32_prorvq512:
15625  return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15626  case X86::BI__builtin_ia32_selectb_128:
15627  case X86::BI__builtin_ia32_selectb_256:
15628  case X86::BI__builtin_ia32_selectb_512:
15629  case X86::BI__builtin_ia32_selectw_128:
15630  case X86::BI__builtin_ia32_selectw_256:
15631  case X86::BI__builtin_ia32_selectw_512:
15632  case X86::BI__builtin_ia32_selectd_128:
15633  case X86::BI__builtin_ia32_selectd_256:
15634  case X86::BI__builtin_ia32_selectd_512:
15635  case X86::BI__builtin_ia32_selectq_128:
15636  case X86::BI__builtin_ia32_selectq_256:
15637  case X86::BI__builtin_ia32_selectq_512:
15638  case X86::BI__builtin_ia32_selectph_128:
15639  case X86::BI__builtin_ia32_selectph_256:
15640  case X86::BI__builtin_ia32_selectph_512:
15641  case X86::BI__builtin_ia32_selectpbf_128:
15642  case X86::BI__builtin_ia32_selectpbf_256:
15643  case X86::BI__builtin_ia32_selectpbf_512:
15644  case X86::BI__builtin_ia32_selectps_128:
15645  case X86::BI__builtin_ia32_selectps_256:
15646  case X86::BI__builtin_ia32_selectps_512:
15647  case X86::BI__builtin_ia32_selectpd_128:
15648  case X86::BI__builtin_ia32_selectpd_256:
15649  case X86::BI__builtin_ia32_selectpd_512:
15650  return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15651  case X86::BI__builtin_ia32_selectsh_128:
15652  case X86::BI__builtin_ia32_selectsbf_128:
15653  case X86::BI__builtin_ia32_selectss_128:
15654  case X86::BI__builtin_ia32_selectsd_128: {
15655  Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15656  Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15657  A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15658  return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15659  }
15660  case X86::BI__builtin_ia32_cmpb128_mask:
15661  case X86::BI__builtin_ia32_cmpb256_mask:
15662  case X86::BI__builtin_ia32_cmpb512_mask:
15663  case X86::BI__builtin_ia32_cmpw128_mask:
15664  case X86::BI__builtin_ia32_cmpw256_mask:
15665  case X86::BI__builtin_ia32_cmpw512_mask:
15666  case X86::BI__builtin_ia32_cmpd128_mask:
15667  case X86::BI__builtin_ia32_cmpd256_mask:
15668  case X86::BI__builtin_ia32_cmpd512_mask:
15669  case X86::BI__builtin_ia32_cmpq128_mask:
15670  case X86::BI__builtin_ia32_cmpq256_mask:
15671  case X86::BI__builtin_ia32_cmpq512_mask: {
15672  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15673  return EmitX86MaskedCompare(*this, CC, true, Ops);
15674  }
15675  case X86::BI__builtin_ia32_ucmpb128_mask:
15676  case X86::BI__builtin_ia32_ucmpb256_mask:
15677  case X86::BI__builtin_ia32_ucmpb512_mask:
15678  case X86::BI__builtin_ia32_ucmpw128_mask:
15679  case X86::BI__builtin_ia32_ucmpw256_mask:
15680  case X86::BI__builtin_ia32_ucmpw512_mask:
15681  case X86::BI__builtin_ia32_ucmpd128_mask:
15682  case X86::BI__builtin_ia32_ucmpd256_mask:
15683  case X86::BI__builtin_ia32_ucmpd512_mask:
15684  case X86::BI__builtin_ia32_ucmpq128_mask:
15685  case X86::BI__builtin_ia32_ucmpq256_mask:
15686  case X86::BI__builtin_ia32_ucmpq512_mask: {
15687  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15688  return EmitX86MaskedCompare(*this, CC, false, Ops);
15689  }
15690  case X86::BI__builtin_ia32_vpcomb:
15691  case X86::BI__builtin_ia32_vpcomw:
15692  case X86::BI__builtin_ia32_vpcomd:
15693  case X86::BI__builtin_ia32_vpcomq:
15694  return EmitX86vpcom(*this, Ops, true);
15695  case X86::BI__builtin_ia32_vpcomub:
15696  case X86::BI__builtin_ia32_vpcomuw:
15697  case X86::BI__builtin_ia32_vpcomud:
15698  case X86::BI__builtin_ia32_vpcomuq:
15699  return EmitX86vpcom(*this, Ops, false);
15700 
15701  case X86::BI__builtin_ia32_kortestcqi:
15702  case X86::BI__builtin_ia32_kortestchi:
15703  case X86::BI__builtin_ia32_kortestcsi:
15704  case X86::BI__builtin_ia32_kortestcdi: {
15705  Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15706  Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15707  Value *Cmp = Builder.CreateICmpEQ(Or, C);
15708  return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15709  }
15710  case X86::BI__builtin_ia32_kortestzqi:
15711  case X86::BI__builtin_ia32_kortestzhi:
15712  case X86::BI__builtin_ia32_kortestzsi:
15713  case X86::BI__builtin_ia32_kortestzdi: {
15714  Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15715  Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15716  Value *Cmp = Builder.CreateICmpEQ(Or, C);
15717  return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15718  }
15719 
15720  case X86::BI__builtin_ia32_ktestcqi:
15721  case X86::BI__builtin_ia32_ktestzqi:
15722  case X86::BI__builtin_ia32_ktestchi:
15723  case X86::BI__builtin_ia32_ktestzhi:
15724  case X86::BI__builtin_ia32_ktestcsi:
15725  case X86::BI__builtin_ia32_ktestzsi:
15726  case X86::BI__builtin_ia32_ktestcdi:
15727  case X86::BI__builtin_ia32_ktestzdi: {
15728  Intrinsic::ID IID;
15729  switch (BuiltinID) {
15730  default: llvm_unreachable("Unsupported intrinsic!");
15731  case X86::BI__builtin_ia32_ktestcqi:
15732  IID = Intrinsic::x86_avx512_ktestc_b;
15733  break;
15734  case X86::BI__builtin_ia32_ktestzqi:
15735  IID = Intrinsic::x86_avx512_ktestz_b;
15736  break;
15737  case X86::BI__builtin_ia32_ktestchi:
15738  IID = Intrinsic::x86_avx512_ktestc_w;
15739  break;
15740  case X86::BI__builtin_ia32_ktestzhi:
15741  IID = Intrinsic::x86_avx512_ktestz_w;
15742  break;
15743  case X86::BI__builtin_ia32_ktestcsi:
15744  IID = Intrinsic::x86_avx512_ktestc_d;
15745  break;
15746  case X86::BI__builtin_ia32_ktestzsi:
15747  IID = Intrinsic::x86_avx512_ktestz_d;
15748  break;
15749  case X86::BI__builtin_ia32_ktestcdi:
15750  IID = Intrinsic::x86_avx512_ktestc_q;
15751  break;
15752  case X86::BI__builtin_ia32_ktestzdi:
15753  IID = Intrinsic::x86_avx512_ktestz_q;
15754  break;
15755  }
15756 
15757  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15758  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15759  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15760  Function *Intr = CGM.getIntrinsic(IID);
15761  return Builder.CreateCall(Intr, {LHS, RHS});
15762  }
15763 
15764  case X86::BI__builtin_ia32_kaddqi:
15765  case X86::BI__builtin_ia32_kaddhi:
15766  case X86::BI__builtin_ia32_kaddsi:
15767  case X86::BI__builtin_ia32_kadddi: {
15768  Intrinsic::ID IID;
15769  switch (BuiltinID) {
15770  default: llvm_unreachable("Unsupported intrinsic!");
15771  case X86::BI__builtin_ia32_kaddqi:
15772  IID = Intrinsic::x86_avx512_kadd_b;
15773  break;
15774  case X86::BI__builtin_ia32_kaddhi:
15775  IID = Intrinsic::x86_avx512_kadd_w;
15776  break;
15777  case X86::BI__builtin_ia32_kaddsi:
15778  IID = Intrinsic::x86_avx512_kadd_d;
15779  break;
15780  case X86::BI__builtin_ia32_kadddi:
15781  IID = Intrinsic::x86_avx512_kadd_q;
15782  break;
15783  }
15784 
15785  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15786  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15787  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15788  Function *Intr = CGM.getIntrinsic(IID);
15789  Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15790  return Builder.CreateBitCast(Res, Ops[0]->getType());
15791  }
15792  case X86::BI__builtin_ia32_kandqi:
15793  case X86::BI__builtin_ia32_kandhi:
15794  case X86::BI__builtin_ia32_kandsi:
15795  case X86::BI__builtin_ia32_kanddi:
15796  return EmitX86MaskLogic(*this, Instruction::And, Ops);
15797  case X86::BI__builtin_ia32_kandnqi:
15798  case X86::BI__builtin_ia32_kandnhi:
15799  case X86::BI__builtin_ia32_kandnsi:
15800  case X86::BI__builtin_ia32_kandndi:
15801  return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15802  case X86::BI__builtin_ia32_korqi:
15803  case X86::BI__builtin_ia32_korhi:
15804  case X86::BI__builtin_ia32_korsi:
15805  case X86::BI__builtin_ia32_kordi:
15806  return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15807  case X86::BI__builtin_ia32_kxnorqi:
15808  case X86::BI__builtin_ia32_kxnorhi:
15809  case X86::BI__builtin_ia32_kxnorsi:
15810  case X86::BI__builtin_ia32_kxnordi:
15811  return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15812  case X86::BI__builtin_ia32_kxorqi:
15813  case X86::BI__builtin_ia32_kxorhi:
15814  case X86::BI__builtin_ia32_kxorsi:
15815  case X86::BI__builtin_ia32_kxordi:
15816  return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
15817  case X86::BI__builtin_ia32_knotqi:
15818  case X86::BI__builtin_ia32_knothi:
15819  case X86::BI__builtin_ia32_knotsi:
15820  case X86::BI__builtin_ia32_knotdi: {
15821  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15822  Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15823  return Builder.CreateBitCast(Builder.CreateNot(Res),
15824  Ops[0]->getType());
15825  }
15826  case X86::BI__builtin_ia32_kmovb:
15827  case X86::BI__builtin_ia32_kmovw:
15828  case X86::BI__builtin_ia32_kmovd:
15829  case X86::BI__builtin_ia32_kmovq: {
15830  // Bitcast to vXi1 type and then back to integer. This gets the mask
15831  // register type into the IR, but might be optimized out depending on
15832  // what's around it.
15833  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15834  Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15835  return Builder.CreateBitCast(Res, Ops[0]->getType());
15836  }
15837 
15838  case X86::BI__builtin_ia32_kunpckdi:
15839  case X86::BI__builtin_ia32_kunpcksi:
15840  case X86::BI__builtin_ia32_kunpckhi: {
15841  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15842  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15843  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15844  int Indices[64];
15845  for (unsigned i = 0; i != NumElts; ++i)
15846  Indices[i] = i;
15847 
15848  // First extract half of each vector. This gives better codegen than
15849  // doing it in a single shuffle.
15850  LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15851  RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15852  // Concat the vectors.
15853  // NOTE: Operands are swapped to match the intrinsic definition.
15854  Value *Res =
15855  Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15856  return Builder.CreateBitCast(Res, Ops[0]->getType());
15857  }
15858 
15859  case X86::BI__builtin_ia32_vplzcntd_128:
15860  case X86::BI__builtin_ia32_vplzcntd_256:
15861  case X86::BI__builtin_ia32_vplzcntd_512:
15862  case X86::BI__builtin_ia32_vplzcntq_128:
15863  case X86::BI__builtin_ia32_vplzcntq_256:
15864  case X86::BI__builtin_ia32_vplzcntq_512: {
15865  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15866  return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15867  }
15868  case X86::BI__builtin_ia32_sqrtss:
15869  case X86::BI__builtin_ia32_sqrtsd: {
15870  Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15871  Function *F;
15872  if (Builder.getIsFPConstrained()) {
15873  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15874  F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15875  A->getType());
15876  A = Builder.CreateConstrainedFPCall(F, {A});
15877  } else {
15878  F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15879  A = Builder.CreateCall(F, {A});
15880  }
15881  return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15882  }
15883  case X86::BI__builtin_ia32_sqrtsh_round_mask:
15884  case X86::BI__builtin_ia32_sqrtsd_round_mask:
15885  case X86::BI__builtin_ia32_sqrtss_round_mask: {
15886  unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
15887  // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15888  // otherwise keep the intrinsic.
15889  if (CC != 4) {
15890  Intrinsic::ID IID;
15891 
15892  switch (BuiltinID) {
15893  default:
15894  llvm_unreachable("Unsupported intrinsic!");
15895  case X86::BI__builtin_ia32_sqrtsh_round_mask:
15896  IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15897  break;
15898  case X86::BI__builtin_ia32_sqrtsd_round_mask:
15899  IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15900  break;
15901  case X86::BI__builtin_ia32_sqrtss_round_mask:
15902  IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15903  break;
15904  }
15905  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15906  }
15907  Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15908  Function *F;
15909  if (Builder.getIsFPConstrained()) {
15910  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15911  F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15912  A->getType());
15913  A = Builder.CreateConstrainedFPCall(F, A);
15914  } else {
15915  F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15916  A = Builder.CreateCall(F, A);
15917  }
15918  Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15919  A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
15920  return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15921  }
15922  case X86::BI__builtin_ia32_sqrtpd256:
15923  case X86::BI__builtin_ia32_sqrtpd:
15924  case X86::BI__builtin_ia32_sqrtps256:
15925  case X86::BI__builtin_ia32_sqrtps:
15926  case X86::BI__builtin_ia32_sqrtph256:
15927  case X86::BI__builtin_ia32_sqrtph:
15928  case X86::BI__builtin_ia32_sqrtph512:
15929  case X86::BI__builtin_ia32_sqrtps512:
15930  case X86::BI__builtin_ia32_sqrtpd512: {
15931  if (Ops.size() == 2) {
15932  unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15933  // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15934  // otherwise keep the intrinsic.
15935  if (CC != 4) {
15936  Intrinsic::ID IID;
15937 
15938  switch (BuiltinID) {
15939  default:
15940  llvm_unreachable("Unsupported intrinsic!");
15941  case X86::BI__builtin_ia32_sqrtph512:
15942  IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15943  break;
15944  case X86::BI__builtin_ia32_sqrtps512:
15945  IID = Intrinsic::x86_avx512_sqrt_ps_512;
15946  break;
15947  case X86::BI__builtin_ia32_sqrtpd512:
15948  IID = Intrinsic::x86_avx512_sqrt_pd_512;
15949  break;
15950  }
15951  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15952  }
15953  }
15954  if (Builder.getIsFPConstrained()) {
15955  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15956  Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15957  Ops[0]->getType());
15958  return Builder.CreateConstrainedFPCall(F, Ops[0]);
15959  } else {
15960  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15961  return Builder.CreateCall(F, Ops[0]);
15962  }
15963  }
15964 
15965  case X86::BI__builtin_ia32_pmuludq128:
15966  case X86::BI__builtin_ia32_pmuludq256:
15967  case X86::BI__builtin_ia32_pmuludq512:
15968  return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15969 
15970  case X86::BI__builtin_ia32_pmuldq128:
15971  case X86::BI__builtin_ia32_pmuldq256:
15972  case X86::BI__builtin_ia32_pmuldq512:
15973  return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15974 
15975  case X86::BI__builtin_ia32_pternlogd512_mask:
15976  case X86::BI__builtin_ia32_pternlogq512_mask:
15977  case X86::BI__builtin_ia32_pternlogd128_mask:
15978  case X86::BI__builtin_ia32_pternlogd256_mask:
15979  case X86::BI__builtin_ia32_pternlogq128_mask:
15980  case X86::BI__builtin_ia32_pternlogq256_mask:
15981  return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
15982 
15983  case X86::BI__builtin_ia32_pternlogd512_maskz:
15984  case X86::BI__builtin_ia32_pternlogq512_maskz:
15985  case X86::BI__builtin_ia32_pternlogd128_maskz:
15986  case X86::BI__builtin_ia32_pternlogd256_maskz:
15987  case X86::BI__builtin_ia32_pternlogq128_maskz:
15988  case X86::BI__builtin_ia32_pternlogq256_maskz:
15989  return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
15990 
15991  case X86::BI__builtin_ia32_vpshldd128:
15992  case X86::BI__builtin_ia32_vpshldd256:
15993  case X86::BI__builtin_ia32_vpshldd512:
15994  case X86::BI__builtin_ia32_vpshldq128:
15995  case X86::BI__builtin_ia32_vpshldq256:
15996  case X86::BI__builtin_ia32_vpshldq512:
15997  case X86::BI__builtin_ia32_vpshldw128:
15998  case X86::BI__builtin_ia32_vpshldw256:
15999  case X86::BI__builtin_ia32_vpshldw512:
16000  return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16001 
16002  case X86::BI__builtin_ia32_vpshrdd128:
16003  case X86::BI__builtin_ia32_vpshrdd256:
16004  case X86::BI__builtin_ia32_vpshrdd512:
16005  case X86::BI__builtin_ia32_vpshrdq128:
16006  case X86::BI__builtin_ia32_vpshrdq256:
16007  case X86::BI__builtin_ia32_vpshrdq512:
16008  case X86::BI__builtin_ia32_vpshrdw128:
16009  case X86::BI__builtin_ia32_vpshrdw256:
16010  case X86::BI__builtin_ia32_vpshrdw512:
16011  // Ops 0 and 1 are swapped.
16012  return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16013 
16014  case X86::BI__builtin_ia32_vpshldvd128:
16015  case X86::BI__builtin_ia32_vpshldvd256:
16016  case X86::BI__builtin_ia32_vpshldvd512:
16017  case X86::BI__builtin_ia32_vpshldvq128:
16018  case X86::BI__builtin_ia32_vpshldvq256:
16019  case X86::BI__builtin_ia32_vpshldvq512:
16020  case X86::BI__builtin_ia32_vpshldvw128:
16021  case X86::BI__builtin_ia32_vpshldvw256:
16022  case X86::BI__builtin_ia32_vpshldvw512:
16023  return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16024 
16025  case X86::BI__builtin_ia32_vpshrdvd128:
16026  case X86::BI__builtin_ia32_vpshrdvd256:
16027  case X86::BI__builtin_ia32_vpshrdvd512:
16028  case X86::BI__builtin_ia32_vpshrdvq128:
16029  case X86::BI__builtin_ia32_vpshrdvq256:
16030  case X86::BI__builtin_ia32_vpshrdvq512:
16031  case X86::BI__builtin_ia32_vpshrdvw128:
16032  case X86::BI__builtin_ia32_vpshrdvw256:
16033  case X86::BI__builtin_ia32_vpshrdvw512:
16034  // Ops 0 and 1 are swapped.
16035  return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16036 
16037  // Reductions
16038  case X86::BI__builtin_ia32_reduce_fadd_pd512:
16039  case X86::BI__builtin_ia32_reduce_fadd_ps512:
16040  case X86::BI__builtin_ia32_reduce_fadd_ph512:
16041  case X86::BI__builtin_ia32_reduce_fadd_ph256:
16042  case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16043  Function *F =
16044  CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16045  IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16046  Builder.getFastMathFlags().setAllowReassoc();
16047  return Builder.CreateCall(F, {Ops[0], Ops[1]});
16048  }
16049  case X86::BI__builtin_ia32_reduce_fmul_pd512:
16050  case X86::BI__builtin_ia32_reduce_fmul_ps512:
16051  case X86::BI__builtin_ia32_reduce_fmul_ph512:
16052  case X86::BI__builtin_ia32_reduce_fmul_ph256:
16053  case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16054  Function *F =
16055  CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16056  IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16057  Builder.getFastMathFlags().setAllowReassoc();
16058  return Builder.CreateCall(F, {Ops[0], Ops[1]});
16059  }
16060  case X86::BI__builtin_ia32_reduce_fmax_pd512:
16061  case X86::BI__builtin_ia32_reduce_fmax_ps512:
16062  case X86::BI__builtin_ia32_reduce_fmax_ph512:
16063  case X86::BI__builtin_ia32_reduce_fmax_ph256:
16064  case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16065  Function *F =
16066  CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16067  IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16068  Builder.getFastMathFlags().setNoNaNs();
16069  return Builder.CreateCall(F, {Ops[0]});
16070  }
16071  case X86::BI__builtin_ia32_reduce_fmin_pd512:
16072  case X86::BI__builtin_ia32_reduce_fmin_ps512:
16073  case X86::BI__builtin_ia32_reduce_fmin_ph512:
16074  case X86::BI__builtin_ia32_reduce_fmin_ph256:
16075  case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16076  Function *F =
16077  CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16078  IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16079  Builder.getFastMathFlags().setNoNaNs();
16080  return Builder.CreateCall(F, {Ops[0]});
16081  }
16082 
16083  // 3DNow!
16084  case X86::BI__builtin_ia32_pswapdsf:
16085  case X86::BI__builtin_ia32_pswapdsi: {
16086  llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
16087  Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
16088  llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
16089  return Builder.CreateCall(F, Ops, "pswapd");
16090  }
16091  case X86::BI__builtin_ia32_rdrand16_step:
16092  case X86::BI__builtin_ia32_rdrand32_step:
16093  case X86::BI__builtin_ia32_rdrand64_step:
16094  case X86::BI__builtin_ia32_rdseed16_step:
16095  case X86::BI__builtin_ia32_rdseed32_step:
16096  case X86::BI__builtin_ia32_rdseed64_step: {
16097  Intrinsic::ID ID;
16098  switch (BuiltinID) {
16099  default: llvm_unreachable("Unsupported intrinsic!");
16100  case X86::BI__builtin_ia32_rdrand16_step:
16101  ID = Intrinsic::x86_rdrand_16;
16102  break;
16103  case X86::BI__builtin_ia32_rdrand32_step:
16104  ID = Intrinsic::x86_rdrand_32;
16105  break;
16106  case X86::BI__builtin_ia32_rdrand64_step:
16107  ID = Intrinsic::x86_rdrand_64;
16108  break;
16109  case X86::BI__builtin_ia32_rdseed16_step:
16110  ID = Intrinsic::x86_rdseed_16;
16111  break;
16112  case X86::BI__builtin_ia32_rdseed32_step:
16113  ID = Intrinsic::x86_rdseed_32;
16114  break;
16115  case X86::BI__builtin_ia32_rdseed64_step:
16116  ID = Intrinsic::x86_rdseed_64;
16117  break;
16118  }
16119 
16120  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16121  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16122  Ops[0]);
16123  return Builder.CreateExtractValue(Call, 1);
16124  }
16125  case X86::BI__builtin_ia32_addcarryx_u32:
16126  case X86::BI__builtin_ia32_addcarryx_u64:
16127  case X86::BI__builtin_ia32_subborrow_u32:
16128  case X86::BI__builtin_ia32_subborrow_u64: {
16129  Intrinsic::ID IID;
16130  switch (BuiltinID) {
16131  default: llvm_unreachable("Unsupported intrinsic!");
16132  case X86::BI__builtin_ia32_addcarryx_u32:
16133  IID = Intrinsic::x86_addcarry_32;
16134  break;
16135  case X86::BI__builtin_ia32_addcarryx_u64:
16136  IID = Intrinsic::x86_addcarry_64;
16137  break;
16138  case X86::BI__builtin_ia32_subborrow_u32:
16139  IID = Intrinsic::x86_subborrow_32;
16140  break;
16141  case X86::BI__builtin_ia32_subborrow_u64:
16142  IID = Intrinsic::x86_subborrow_64;
16143  break;
16144  }
16145 
16146  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16147  { Ops[0], Ops[1], Ops[2] });
16148  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16149  Ops[3]);
16150  return Builder.CreateExtractValue(Call, 0);
16151  }
16152 
16153  case X86::BI__builtin_ia32_fpclassps128_mask:
16154  case X86::BI__builtin_ia32_fpclassps256_mask:
16155  case X86::BI__builtin_ia32_fpclassps512_mask:
16156  case X86::BI__builtin_ia32_fpclassph128_mask:
16157  case X86::BI__builtin_ia32_fpclassph256_mask:
16158  case X86::BI__builtin_ia32_fpclassph512_mask:
16159  case X86::BI__builtin_ia32_fpclasspd128_mask:
16160  case X86::BI__builtin_ia32_fpclasspd256_mask:
16161  case X86::BI__builtin_ia32_fpclasspd512_mask: {
16162  unsigned NumElts =
16163  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16164  Value *MaskIn = Ops[2];
16165  Ops.erase(&Ops[2]);
16166 
16167  Intrinsic::ID ID;
16168  switch (BuiltinID) {
16169  default: llvm_unreachable("Unsupported intrinsic!");
16170  case X86::BI__builtin_ia32_fpclassph128_mask:
16171  ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16172  break;
16173  case X86::BI__builtin_ia32_fpclassph256_mask:
16174  ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16175  break;
16176  case X86::BI__builtin_ia32_fpclassph512_mask:
16177  ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16178  break;
16179  case X86::BI__builtin_ia32_fpclassps128_mask:
16180  ID = Intrinsic::x86_avx512_fpclass_ps_128;
16181  break;
16182  case X86::BI__builtin_ia32_fpclassps256_mask:
16183  ID = Intrinsic::x86_avx512_fpclass_ps_256;
16184  break;
16185  case X86::BI__builtin_ia32_fpclassps512_mask:
16186  ID = Intrinsic::x86_avx512_fpclass_ps_512;
16187  break;
16188  case X86::BI__builtin_ia32_fpclasspd128_mask:
16189  ID = Intrinsic::x86_avx512_fpclass_pd_128;
16190  break;
16191  case X86::BI__builtin_ia32_fpclasspd256_mask:
16192  ID = Intrinsic::x86_avx512_fpclass_pd_256;
16193  break;
16194  case X86::BI__builtin_ia32_fpclasspd512_mask:
16195  ID = Intrinsic::x86_avx512_fpclass_pd_512;
16196  break;
16197  }
16198 
16199  Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16200  return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16201  }
16202 
16203  case X86::BI__builtin_ia32_vp2intersect_q_512:
16204  case X86::BI__builtin_ia32_vp2intersect_q_256:
16205  case X86::BI__builtin_ia32_vp2intersect_q_128:
16206  case X86::BI__builtin_ia32_vp2intersect_d_512:
16207  case X86::BI__builtin_ia32_vp2intersect_d_256:
16208  case X86::BI__builtin_ia32_vp2intersect_d_128: {
16209  unsigned NumElts =
16210  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16211  Intrinsic::ID ID;
16212 
16213  switch (BuiltinID) {
16214  default: llvm_unreachable("Unsupported intrinsic!");
16215  case X86::BI__builtin_ia32_vp2intersect_q_512:
16216  ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16217  break;
16218  case X86::BI__builtin_ia32_vp2intersect_q_256:
16219  ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16220  break;
16221  case X86::BI__builtin_ia32_vp2intersect_q_128:
16222  ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16223  break;
16224  case X86::BI__builtin_ia32_vp2intersect_d_512:
16225  ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16226  break;
16227  case X86::BI__builtin_ia32_vp2intersect_d_256:
16228  ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16229  break;
16230  case X86::BI__builtin_ia32_vp2intersect_d_128:
16231  ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16232  break;
16233  }
16234 
16235  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16236  Value *Result = Builder.CreateExtractValue(Call, 0);
16237  Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16238  Builder.CreateDefaultAlignedStore(Result, Ops[2]);
16239 
16240  Result = Builder.CreateExtractValue(Call, 1);
16241  Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16242  return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
16243  }
16244 
16245  case X86::BI__builtin_ia32_vpmultishiftqb128:
16246  case X86::BI__builtin_ia32_vpmultishiftqb256:
16247  case X86::BI__builtin_ia32_vpmultishiftqb512: {
16248  Intrinsic::ID ID;
16249  switch (BuiltinID) {
16250  default: llvm_unreachable("Unsupported intrinsic!");
16251  case X86::BI__builtin_ia32_vpmultishiftqb128:
16252  ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16253  break;
16254  case X86::BI__builtin_ia32_vpmultishiftqb256:
16255  ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16256  break;
16257  case X86::BI__builtin_ia32_vpmultishiftqb512:
16258  ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16259  break;
16260  }
16261 
16262  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16263  }
16264 
16265  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16266  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16267  case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16268  unsigned NumElts =
16269  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16270  Value *MaskIn = Ops[2];
16271  Ops.erase(&Ops[2]);
16272 
16273  Intrinsic::ID ID;
16274  switch (BuiltinID) {
16275  default: llvm_unreachable("Unsupported intrinsic!");
16276  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16277  ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16278  break;
16279  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16280  ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16281  break;
16282  case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16283  ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16284  break;
16285  }
16286 
16287  Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16288  return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16289  }
16290 
16291  // packed comparison intrinsics
16292  case X86::BI__builtin_ia32_cmpeqps:
16293  case X86::BI__builtin_ia32_cmpeqpd:
16294  return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16295  case X86::BI__builtin_ia32_cmpltps:
16296  case X86::BI__builtin_ia32_cmpltpd:
16297  return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16298  case X86::BI__builtin_ia32_cmpleps:
16299  case X86::BI__builtin_ia32_cmplepd:
16300  return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16301  case X86::BI__builtin_ia32_cmpunordps:
16302  case X86::BI__builtin_ia32_cmpunordpd:
16303  return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16304  case X86::BI__builtin_ia32_cmpneqps:
16305  case X86::BI__builtin_ia32_cmpneqpd:
16306  return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16307  case X86::BI__builtin_ia32_cmpnltps:
16308  case X86::BI__builtin_ia32_cmpnltpd:
16309  return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16310  case X86::BI__builtin_ia32_cmpnleps:
16311  case X86::BI__builtin_ia32_cmpnlepd:
16312  return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16313  case X86::BI__builtin_ia32_cmpordps:
16314  case X86::BI__builtin_ia32_cmpordpd:
16315  return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16316  case X86::BI__builtin_ia32_cmpph128_mask:
16317  case X86::BI__builtin_ia32_cmpph256_mask:
16318  case X86::BI__builtin_ia32_cmpph512_mask:
16319  case X86::BI__builtin_ia32_cmpps128_mask:
16320  case X86::BI__builtin_ia32_cmpps256_mask:
16321  case X86::BI__builtin_ia32_cmpps512_mask:
16322  case X86::BI__builtin_ia32_cmppd128_mask:
16323  case X86::BI__builtin_ia32_cmppd256_mask:
16324  case X86::BI__builtin_ia32_cmppd512_mask:
16325  IsMaskFCmp = true;
16326  [[fallthrough]];
16327  case X86::BI__builtin_ia32_cmpps:
16328  case X86::BI__builtin_ia32_cmpps256:
16329  case X86::BI__builtin_ia32_cmppd:
16330  case X86::BI__builtin_ia32_cmppd256: {
16331  // Lowering vector comparisons to fcmp instructions, while
16332  // ignoring signalling behaviour requested
16333  // ignoring rounding mode requested
16334  // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16335 
16336  // The third argument is the comparison condition, and integer in the
16337  // range [0, 31]
16338  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16339 
16340  // Lowering to IR fcmp instruction.
16341  // Ignoring requested signaling behaviour,
16342  // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16343  FCmpInst::Predicate Pred;
16344  bool IsSignaling;
16345  // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16346  // behavior is inverted. We'll handle that after the switch.
16347  switch (CC & 0xf) {
16348  case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16349  case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16350  case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16351  case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16352  case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16353  case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16354  case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16355  case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16356  case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16357  case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16358  case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16359  case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16360  case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16361  case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16362  case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16363  case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16364  default: llvm_unreachable("Unhandled CC");
16365  }
16366 
16367  // Invert the signalling behavior for 16-31.
16368  if (CC & 0x10)
16369  IsSignaling = !IsSignaling;
16370 
16371  // If the predicate is true or false and we're using constrained intrinsics,
16372  // we don't have a compare intrinsic we can use. Just use the legacy X86
16373  // specific intrinsic.
16374  // If the intrinsic is mask enabled and we're using constrained intrinsics,
16375  // use the legacy X86 specific intrinsic.
16376  if (Builder.getIsFPConstrained() &&
16377  (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16378  IsMaskFCmp)) {
16379 
16380  Intrinsic::ID IID;
16381  switch (BuiltinID) {
16382  default: llvm_unreachable("Unexpected builtin");
16383  case X86::BI__builtin_ia32_cmpps:
16384  IID = Intrinsic::x86_sse_cmp_ps;
16385  break;
16386  case X86::BI__builtin_ia32_cmpps256:
16387  IID = Intrinsic::x86_avx_cmp_ps_256;
16388  break;
16389  case X86::BI__builtin_ia32_cmppd:
16390  IID = Intrinsic::x86_sse2_cmp_pd;
16391  break;
16392  case X86::BI__builtin_ia32_cmppd256:
16393  IID = Intrinsic::x86_avx_cmp_pd_256;
16394  break;
16395  case X86::BI__builtin_ia32_cmpph128_mask:
16396  IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16397  break;
16398  case X86::BI__builtin_ia32_cmpph256_mask:
16399  IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16400  break;
16401  case X86::BI__builtin_ia32_cmpph512_mask:
16402  IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16403  break;
16404  case X86::BI__builtin_ia32_cmpps512_mask:
16405  IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16406  break;
16407  case X86::BI__builtin_ia32_cmppd512_mask:
16408  IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16409  break;
16410  case X86::BI__builtin_ia32_cmpps128_mask:
16411  IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16412  break;
16413  case X86::BI__builtin_ia32_cmpps256_mask:
16414  IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16415  break;
16416  case X86::BI__builtin_ia32_cmppd128_mask:
16417  IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16418  break;
16419  case X86::BI__builtin_ia32_cmppd256_mask:
16420  IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16421  break;
16422  }
16423 
16424  Function *Intr = CGM.getIntrinsic(IID);
16425  if (IsMaskFCmp) {
16426  unsigned NumElts =
16427  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16428  Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16429  Value *Cmp = Builder.CreateCall(Intr, Ops);
16430  return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16431  }
16432 
16433  return Builder.CreateCall(Intr, Ops);
16434  }
16435 
16436  // Builtins without the _mask suffix return a vector of integers
16437  // of the same width as the input vectors
16438  if (IsMaskFCmp) {
16439  // We ignore SAE if strict FP is disabled. We only keep precise
16440  // exception behavior under strict FP.
16441  // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16442  // object will be required.
16443  unsigned NumElts =
16444  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16445  Value *Cmp;
16446  if (IsSignaling)
16447  Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16448  else
16449  Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16450  return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16451  }
16452 
16453  return getVectorFCmpIR(Pred, IsSignaling);
16454  }
16455 
16456  // SSE scalar comparison intrinsics
16457  case X86::BI__builtin_ia32_cmpeqss:
16458  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16459  case X86::BI__builtin_ia32_cmpltss:
16460  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16461  case X86::BI__builtin_ia32_cmpless:
16462  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16463  case X86::BI__builtin_ia32_cmpunordss:
16464  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16465  case X86::BI__builtin_ia32_cmpneqss:
16466  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16467  case X86::BI__builtin_ia32_cmpnltss:
16468  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16469  case X86::BI__builtin_ia32_cmpnless:
16470  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16471  case X86::BI__builtin_ia32_cmpordss:
16472  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16473  case X86::BI__builtin_ia32_cmpeqsd:
16474  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16475  case X86::BI__builtin_ia32_cmpltsd:
16476  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16477  case X86::BI__builtin_ia32_cmplesd:
16478  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16479  case X86::BI__builtin_ia32_cmpunordsd:
16480  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16481  case X86::BI__builtin_ia32_cmpneqsd:
16482  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16483  case X86::BI__builtin_ia32_cmpnltsd:
16484  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16485  case X86::BI__builtin_ia32_cmpnlesd:
16486  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16487  case X86::BI__builtin_ia32_cmpordsd:
16488  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16489 
16490  // f16c half2float intrinsics
16491  case X86::BI__builtin_ia32_vcvtph2ps:
16492  case X86::BI__builtin_ia32_vcvtph2ps256:
16493  case X86::BI__builtin_ia32_vcvtph2ps_mask:
16494  case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16495  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16496  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16497  return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16498  }
16499 
16500  // AVX512 bf16 intrinsics
16501  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16502  Ops[2] = getMaskVecValue(
16503  *this, Ops[2],
16504  cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16505  Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16506  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16507  }
16508  case X86::BI__builtin_ia32_cvtsbf162ss_32:
16509  return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16510 
16511  case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16512  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16513  Intrinsic::ID IID;
16514  switch (BuiltinID) {
16515  default: llvm_unreachable("Unsupported intrinsic!");
16516  case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16517  IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16518  break;
16519  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16520  IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16521  break;
16522  }
16523  Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16524  return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16525  }
16526 
16527  case X86::BI__cpuid:
16528  case X86::BI__cpuidex: {
16529  Value *FuncId = EmitScalarExpr(E->getArg(1));
16530  Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16531  ? EmitScalarExpr(E->getArg(2))
16532  : llvm::ConstantInt::get(Int32Ty, 0);
16533 
16534  llvm::StructType *CpuidRetTy =
16535  llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16536  llvm::FunctionType *FTy =
16537  llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16538 
16539  StringRef Asm, Constraints;
16540  if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16541  Asm = "cpuid";
16542  Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16543  } else {
16544  // x86-64 uses %rbx as the base register, so preserve it.
16545  Asm = "xchgq %rbx, ${1:q}\n"
16546  "cpuid\n"
16547  "xchgq %rbx, ${1:q}";
16548  Constraints = "={ax},=r,={cx},={dx},0,2";
16549  }
16550 
16551  llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16552  /*hasSideEffects=*/false);
16553  Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16554  Value *BasePtr = EmitScalarExpr(E->getArg(0));
16555  Value *Store = nullptr;
16556  for (unsigned i = 0; i < 4; i++) {
16557  Value *Extracted = Builder.CreateExtractValue(IACall, i);
16558  Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16559  Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16560  }
16561 
16562  // Return the last store instruction to signal that we have emitted the
16563  // the intrinsic.
16564  return Store;
16565  }
16566 
16567  case X86::BI__emul:
16568  case X86::BI__emulu: {
16569  llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16570  bool isSigned = (BuiltinID == X86::BI__emul);
16571  Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16572  Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16573  return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16574  }
16575  case X86::BI__mulh:
16576  case X86::BI__umulh:
16577  case X86::BI_mul128:
16578  case X86::BI_umul128: {
16579  llvm::Type *ResType = ConvertType(E->getType());
16580  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16581 
16582  bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16583  Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16584  Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16585 
16586  Value *MulResult, *HigherBits;
16587  if (IsSigned) {
16588  MulResult = Builder.CreateNSWMul(LHS, RHS);
16589  HigherBits = Builder.CreateAShr(MulResult, 64);
16590  } else {
16591  MulResult = Builder.CreateNUWMul(LHS, RHS);
16592  HigherBits = Builder.CreateLShr(MulResult, 64);
16593  }
16594  HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16595 
16596  if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16597  return HigherBits;
16598 
16599  Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16600  Builder.CreateStore(HigherBits, HighBitsAddress);
16601  return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16602  }
16603 
16604  case X86::BI__faststorefence: {
16605  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16607  }
16608  case X86::BI__shiftleft128:
16609  case X86::BI__shiftright128: {
16610  llvm::Function *F = CGM.getIntrinsic(
16611  BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16612  Int64Ty);
16613  // Flip low/high ops and zero-extend amount to matching type.
16614  // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16615  // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16616  std::swap(Ops[0], Ops[1]);
16617  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16618  return Builder.CreateCall(F, Ops);
16619  }
16620  case X86::BI_ReadWriteBarrier:
16621  case X86::BI_ReadBarrier:
16622  case X86::BI_WriteBarrier: {
16623  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16624  llvm::SyncScope::SingleThread);
16625  }
16626 
16627  case X86::BI_AddressOfReturnAddress: {
16628  Function *F =
16629  CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16630  return Builder.CreateCall(F);
16631  }
16632  case X86::BI__stosb: {
16633  // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16634  // instruction, but it will create a memset that won't be optimized away.
16635  return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16636  }
16637  case X86::BI__ud2:
16638  // llvm.trap makes a ud2a instruction on x86.
16639  return EmitTrapCall(Intrinsic::trap);
16640  case X86::BI__int2c: {
16641  // This syscall signals a driver assertion failure in x86 NT kernels.
16642  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16643  llvm::InlineAsm *IA =
16644  llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16645  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16646  getLLVMContext(), llvm::AttributeList::FunctionIndex,
16647  llvm::Attribute::NoReturn);
16648  llvm::CallInst *CI = Builder.CreateCall(IA);
16649  CI->setAttributes(NoReturnAttr);
16650  return CI;
16651  }
16652  case X86::BI__readfsbyte:
16653  case X86::BI__readfsword:
16654  case X86::BI__readfsdword:
16655  case X86::BI__readfsqword: {
16656  llvm::Type *IntTy = ConvertType(E->getType());
16657  Value *Ptr = Builder.CreateIntToPtr(
16658  Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16659  LoadInst *Load = Builder.CreateAlignedLoad(
16660  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16661  Load->setVolatile(true);
16662  return Load;
16663  }
16664  case X86::BI__readgsbyte:
16665  case X86::BI__readgsword:
16666  case X86::BI__readgsdword:
16667  case X86::BI__readgsqword: {
16668  llvm::Type *IntTy = ConvertType(E->getType());
16669  Value *Ptr = Builder.CreateIntToPtr(
16670  Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16671  LoadInst *Load = Builder.CreateAlignedLoad(
16672  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16673  Load->setVolatile(true);
16674  return Load;
16675  }
16676  case X86::BI__builtin_ia32_encodekey128_u32: {
16677  Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16678 
16679  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16680 
16681  for (int i = 0; i < 3; ++i) {
16682  Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16683  Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16684  Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16685  }
16686 
16687  return Builder.CreateExtractValue(Call, 0);
16688  }
16689  case X86::BI__builtin_ia32_encodekey256_u32: {
16690  Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16691 
16692  Value *Call =
16693  Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16694 
16695  for (int i = 0; i < 4; ++i) {
16696  Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16697  Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16698  Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16699  }
16700 
16701  return Builder.CreateExtractValue(Call, 0);
16702  }
16703  case X86::BI__builtin_ia32_aesenc128kl_u8:
16704  case X86::BI__builtin_ia32_aesdec128kl_u8:
16705  case X86::BI__builtin_ia32_aesenc256kl_u8:
16706  case X86::BI__builtin_ia32_aesdec256kl_u8: {
16707  Intrinsic::ID IID;
16708  StringRef BlockName;
16709  switch (BuiltinID) {
16710  default:
16711  llvm_unreachable("Unexpected builtin");
16712  case X86::BI__builtin_ia32_aesenc128kl_u8:
16713  IID = Intrinsic::x86_aesenc128kl;
16714  BlockName = "aesenc128kl";
16715  break;
16716  case X86::BI__builtin_ia32_aesdec128kl_u8:
16717  IID = Intrinsic::x86_aesdec128kl;
16718  BlockName = "aesdec128kl";
16719  break;
16720  case X86::BI__builtin_ia32_aesenc256kl_u8:
16721  IID = Intrinsic::x86_aesenc256kl;
16722  BlockName = "aesenc256kl";
16723  break;
16724  case X86::BI__builtin_ia32_aesdec256kl_u8:
16725  IID = Intrinsic::x86_aesdec256kl;
16726  BlockName = "aesdec256kl";
16727  break;
16728  }
16729 
16730  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16731 
16732  BasicBlock *NoError =
16733  createBasicBlock(BlockName + "_no_error", this->CurFn);
16734  BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16735  BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16736 
16737  Value *Ret = Builder.CreateExtractValue(Call, 0);
16738  Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16739  Value *Out = Builder.CreateExtractValue(Call, 1);
16740  Builder.CreateCondBr(Succ, NoError, Error);
16741 
16742  Builder.SetInsertPoint(NoError);
16743  Builder.CreateDefaultAlignedStore(Out, Ops[0]);
16744  Builder.CreateBr(End);
16745 
16746  Builder.SetInsertPoint(Error);
16747  Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16748  Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16749  Builder.CreateBr(End);
16750 
16751  Builder.SetInsertPoint(End);
16752  return Builder.CreateExtractValue(Call, 0);
16753  }
16754  case X86::BI__builtin_ia32_aesencwide128kl_u8:
16755  case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16756  case X86::BI__builtin_ia32_aesencwide256kl_u8:
16757  case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16758  Intrinsic::ID IID;
16759  StringRef BlockName;
16760  switch (BuiltinID) {
16761  case X86::BI__builtin_ia32_aesencwide128kl_u8:
16762  IID = Intrinsic::x86_aesencwide128kl;
16763  BlockName = "aesencwide128kl";
16764  break;
16765  case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16766  IID = Intrinsic::x86_aesdecwide128kl;
16767  BlockName = "aesdecwide128kl";
16768  break;
16769  case X86::BI__builtin_ia32_aesencwide256kl_u8:
16770  IID = Intrinsic::x86_aesencwide256kl;
16771  BlockName = "aesencwide256kl";
16772  break;
16773  case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16774  IID = Intrinsic::x86_aesdecwide256kl;
16775  BlockName = "aesdecwide256kl";
16776  break;
16777  }
16778 
16779  llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16780  Value *InOps[9];
16781  InOps[0] = Ops[2];
16782  for (int i = 0; i != 8; ++i) {
16783  Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16784  InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16785  }
16786 
16787  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16788 
16789  BasicBlock *NoError =
16790  createBasicBlock(BlockName + "_no_error", this->CurFn);
16791  BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16792  BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16793 
16794  Value *Ret = Builder.CreateExtractValue(Call, 0);
16795  Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16796  Builder.CreateCondBr(Succ, NoError, Error);
16797 
16798  Builder.SetInsertPoint(NoError);
16799  for (int i = 0; i != 8; ++i) {
16800  Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16801  Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16802  Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16803  }
16804  Builder.CreateBr(End);
16805 
16806  Builder.SetInsertPoint(Error);
16807  for (int i = 0; i != 8; ++i) {
16808  Value *Out = Builder.CreateExtractValue(Call, i + 1);
16809  Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16810  Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16811  Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16812  }
16813  Builder.CreateBr(End);
16814 
16815  Builder.SetInsertPoint(End);
16816  return Builder.CreateExtractValue(Call, 0);
16817  }
16818  case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16819  IsConjFMA = true;
16820  [[fallthrough]];
16821  case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16822  Intrinsic::ID IID = IsConjFMA
16823  ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16824  : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16825  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16826  return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16827  }
16828  case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16829  IsConjFMA = true;
16830  [[fallthrough]];
16831  case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16832  Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16833  : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16834  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16835  Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16836  return EmitX86Select(*this, And, Call, Ops[0]);
16837  }
16838  case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16839  IsConjFMA = true;
16840  [[fallthrough]];
16841  case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16842  Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16843  : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16844  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16845  static constexpr int Mask[] = {0, 5, 6, 7};
16846  return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16847  }
16848  case X86::BI__builtin_ia32_prefetchi:
16849  return Builder.CreateCall(
16850  CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16851  {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16852  llvm::ConstantInt::get(Int32Ty, 0)});
16853  }
16854 }
16855 
16857  const CallExpr *E) {
16858  // Do not emit the builtin arguments in the arguments of a function call,
16859  // because the evaluation order of function arguments is not specified in C++.
16860  // This is important when testing to ensure the arguments are emitted in the
16861  // same order every time. Eg:
16862  // Instead of:
16863  // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16864  // EmitScalarExpr(E->getArg(1)), "swdiv");
16865  // Use:
16866  // Value *Op0 = EmitScalarExpr(E->getArg(0));
16867  // Value *Op1 = EmitScalarExpr(E->getArg(1));
16868  // return Builder.CreateFDiv(Op0, Op1, "swdiv")
16869 
16870  Intrinsic::ID ID = Intrinsic::not_intrinsic;
16871 
16872 #include "llvm/TargetParser/PPCTargetParser.def"
16873  auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
16874  unsigned CompOp,
16875  unsigned OpValue) -> Value * {
16876  if (SupportMethod == AIX_BUILTIN_PPC_FALSE)
16877  return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16878 
16879  if (SupportMethod == AIX_BUILTIN_PPC_TRUE)
16880  return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
16881 
16882  assert(SupportMethod <= USE_SYS_CONF && "Invalid value for SupportMethod.");
16883  assert((CompOp == COMP_EQ) && "Only equal comparisons are supported.");
16884 
16885  llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
16886  llvm::Constant *SysConf =
16887  CGM.CreateRuntimeVariable(STy, "_system_configuration");
16888 
16889  // Grab the appropriate field from _system_configuration.
16890  llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
16891  ConstantInt::get(Int32Ty, FieldIdx)};
16892 
16893  llvm::Value *FieldValue = Builder.CreateGEP(STy, SysConf, Idxs);
16894  FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
16896  assert(FieldValue->getType()->isIntegerTy(32) &&
16897  "Only 32-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
16898  return Builder.CreateICmp(ICmpInst::ICMP_EQ, FieldValue,
16899  ConstantInt::get(Int32Ty, OpValue));
16900  };
16901 
16902  switch (BuiltinID) {
16903  default: return nullptr;
16904 
16905  case Builtin::BI__builtin_cpu_is: {
16906  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16907  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16908  llvm::Triple Triple = getTarget().getTriple();
16909 
16910  if (Triple.isOSAIX()) {
16911  unsigned IsCpuSupport, FieldIdx, CompareOp, CpuIdValue;
16912  typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUType;
16913  std::tie(IsCpuSupport, FieldIdx, CompareOp, CpuIdValue) =
16914  static_cast<CPUType>(StringSwitch<CPUType>(CPUStr)
16915 #define PPC_AIX_CPU(NAME, SUPPORT_MAGIC, INDEX, COMPARE_OP, VALUE) \
16916  .Case(NAME, {SUPPORT_MAGIC, INDEX, COMPARE_OP, VALUE})
16917 #include "llvm/TargetParser/PPCTargetParser.def"
16918  );
16919  return GenAIXPPCBuiltinCpuExpr(IsCpuSupport, FieldIdx, CompareOp,
16920  CpuIdValue);
16921  }
16922 
16923  assert(Triple.isOSLinux() &&
16924  "__builtin_cpu_is() is only supported for AIX and Linux.");
16925  unsigned NumCPUID = StringSwitch<unsigned>(CPUStr)
16926 #define PPC_LNX_CPU(Name, NumericID) .Case(Name, NumericID)
16927 #include "llvm/TargetParser/PPCTargetParser.def"
16928  .Default(-1U);
16929  assert(NumCPUID < -1U && "Invalid CPU name. Missed by SemaChecking?");
16930  Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
16931  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16932  Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
16933  return Builder.CreateICmpEQ(TheCall,
16934  llvm::ConstantInt::get(Int32Ty, NumCPUID));
16935  }
16936  case Builtin::BI__builtin_cpu_supports: {
16937  unsigned FeatureWord;
16938  unsigned BitMask;
16939  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16940  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16941  std::tie(FeatureWord, BitMask) =
16942  StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
16943 #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
16944  .Case(Name, {FA_WORD, Bitmask})
16945 #include "llvm/TargetParser/PPCTargetParser.def"
16946  .Default({0, 0});
16947  if (!BitMask)
16948  return Builder.getFalse();
16949  Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
16950  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16951  Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
16952  Value *Mask =
16953  Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
16954  return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
16955 #undef PPC_FAWORD_HWCAP
16956 #undef PPC_FAWORD_HWCAP2
16957 #undef PPC_FAWORD_CPUID
16958  }
16959 
16960  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
16961  // call __builtin_readcyclecounter.
16962  case PPC::BI__builtin_ppc_get_timebase:
16963  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
16964 
16965  // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
16966  case PPC::BI__builtin_altivec_lvx:
16967  case PPC::BI__builtin_altivec_lvxl:
16968  case PPC::BI__builtin_altivec_lvebx:
16969  case PPC::BI__builtin_altivec_lvehx:
16970  case PPC::BI__builtin_altivec_lvewx:
16971  case PPC::BI__builtin_altivec_lvsl:
16972  case PPC::BI__builtin_altivec_lvsr:
16973  case PPC::BI__builtin_vsx_lxvd2x:
16974  case PPC::BI__builtin_vsx_lxvw4x:
16975  case PPC::BI__builtin_vsx_lxvd2x_be:
16976  case PPC::BI__builtin_vsx_lxvw4x_be:
16977  case PPC::BI__builtin_vsx_lxvl:
16978  case PPC::BI__builtin_vsx_lxvll:
16979  {
16981  Ops.push_back(EmitScalarExpr(E->getArg(0)));
16982  Ops.push_back(EmitScalarExpr(E->getArg(1)));
16983  if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
16984  BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
16985  Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
16986  Ops.pop_back();
16987  }
16988 
16989  switch (BuiltinID) {
16990  default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
16991  case PPC::BI__builtin_altivec_lvx:
16992  ID = Intrinsic::ppc_altivec_lvx;
16993  break;
16994  case PPC::BI__builtin_altivec_lvxl:
16995  ID = Intrinsic::ppc_altivec_lvxl;
16996  break;
16997  case PPC::BI__builtin_altivec_lvebx:
16998  ID = Intrinsic::ppc_altivec_lvebx;
16999  break;
17000  case PPC::BI__builtin_altivec_lvehx:
17001  ID = Intrinsic::ppc_altivec_lvehx;
17002  break;
17003  case PPC::BI__builtin_altivec_lvewx:
17004  ID = Intrinsic::ppc_altivec_lvewx;
17005  break;
17006  case PPC::BI__builtin_altivec_lvsl:
17007  ID = Intrinsic::ppc_altivec_lvsl;
17008  break;
17009  case PPC::BI__builtin_altivec_lvsr:
17010  ID = Intrinsic::ppc_altivec_lvsr;
17011  break;
17012  case PPC::BI__builtin_vsx_lxvd2x:
17013  ID = Intrinsic::ppc_vsx_lxvd2x;
17014  break;
17015  case PPC::BI__builtin_vsx_lxvw4x:
17016  ID = Intrinsic::ppc_vsx_lxvw4x;
17017  break;
17018  case PPC::BI__builtin_vsx_lxvd2x_be:
17019  ID = Intrinsic::ppc_vsx_lxvd2x_be;
17020  break;
17021  case PPC::BI__builtin_vsx_lxvw4x_be:
17022  ID = Intrinsic::ppc_vsx_lxvw4x_be;
17023  break;
17024  case PPC::BI__builtin_vsx_lxvl:
17025  ID = Intrinsic::ppc_vsx_lxvl;
17026  break;
17027  case PPC::BI__builtin_vsx_lxvll:
17028  ID = Intrinsic::ppc_vsx_lxvll;
17029  break;
17030  }
17031  llvm::Function *F = CGM.getIntrinsic(ID);
17032  return Builder.CreateCall(F, Ops, "");
17033  }
17034 
17035  // vec_st, vec_xst_be
17036  case PPC::BI__builtin_altivec_stvx:
17037  case PPC::BI__builtin_altivec_stvxl:
17038  case PPC::BI__builtin_altivec_stvebx:
17039  case PPC::BI__builtin_altivec_stvehx:
17040  case PPC::BI__builtin_altivec_stvewx:
17041  case PPC::BI__builtin_vsx_stxvd2x:
17042  case PPC::BI__builtin_vsx_stxvw4x:
17043  case PPC::BI__builtin_vsx_stxvd2x_be:
17044  case PPC::BI__builtin_vsx_stxvw4x_be:
17045  case PPC::BI__builtin_vsx_stxvl:
17046  case PPC::BI__builtin_vsx_stxvll:
17047  {
17049  Ops.push_back(EmitScalarExpr(E->getArg(0)));
17050  Ops.push_back(EmitScalarExpr(E->getArg(1)));
17051  Ops.push_back(EmitScalarExpr(E->getArg(2)));
17052  if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17053  BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17054  Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17055  Ops.pop_back();
17056  }
17057 
17058  switch (BuiltinID) {
17059  default: llvm_unreachable("Unsupported st intrinsic!");
17060  case PPC::BI__builtin_altivec_stvx:
17061  ID = Intrinsic::ppc_altivec_stvx;
17062  break;
17063  case PPC::BI__builtin_altivec_stvxl:
17064  ID = Intrinsic::ppc_altivec_stvxl;
17065  break;
17066  case PPC::BI__builtin_altivec_stvebx:
17067  ID = Intrinsic::ppc_altivec_stvebx;
17068  break;
17069  case PPC::BI__builtin_altivec_stvehx:
17070  ID = Intrinsic::ppc_altivec_stvehx;
17071  break;
17072  case PPC::BI__builtin_altivec_stvewx:
17073  ID = Intrinsic::ppc_altivec_stvewx;
17074  break;
17075  case PPC::BI__builtin_vsx_stxvd2x:
17076  ID = Intrinsic::ppc_vsx_stxvd2x;
17077  break;
17078  case PPC::BI__builtin_vsx_stxvw4x:
17079  ID = Intrinsic::ppc_vsx_stxvw4x;
17080  break;
17081  case PPC::BI__builtin_vsx_stxvd2x_be:
17082  ID = Intrinsic::ppc_vsx_stxvd2x_be;
17083  break;
17084  case PPC::BI__builtin_vsx_stxvw4x_be:
17085  ID = Intrinsic::ppc_vsx_stxvw4x_be;
17086  break;
17087  case PPC::BI__builtin_vsx_stxvl:
17088  ID = Intrinsic::ppc_vsx_stxvl;
17089  break;
17090  case PPC::BI__builtin_vsx_stxvll:
17091  ID = Intrinsic::ppc_vsx_stxvll;
17092  break;
17093  }
17094  llvm::Function *F = CGM.getIntrinsic(ID);
17095  return Builder.CreateCall(F, Ops, "");
17096  }
17097  case PPC::BI__builtin_vsx_ldrmb: {
17098  // Essentially boils down to performing an unaligned VMX load sequence so
17099  // as to avoid crossing a page boundary and then shuffling the elements
17100  // into the right side of the vector register.
17101  Value *Op0 = EmitScalarExpr(E->getArg(0));
17102  Value *Op1 = EmitScalarExpr(E->getArg(1));
17103  int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17104  llvm::Type *ResTy = ConvertType(E->getType());
17105  bool IsLE = getTarget().isLittleEndian();
17106 
17107  // If the user wants the entire vector, just load the entire vector.
17108  if (NumBytes == 16) {
17109  Value *BC = Builder.CreateBitCast(Op0, ResTy->getPointerTo());
17110  Value *LD =
17111  Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1)));
17112  if (!IsLE)
17113  return LD;
17114 
17115  // Reverse the bytes on LE.
17116  SmallVector<int, 16> RevMask;
17117  for (int Idx = 0; Idx < 16; Idx++)
17118  RevMask.push_back(15 - Idx);
17119  return Builder.CreateShuffleVector(LD, LD, RevMask);
17120  }
17121 
17122  llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17123  llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17124  : Intrinsic::ppc_altivec_lvsl);
17125  llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17126  Value *HiMem = Builder.CreateGEP(
17127  Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17128  Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17129  Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17130  Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17131 
17132  Op0 = IsLE ? HiLd : LoLd;
17133  Op1 = IsLE ? LoLd : HiLd;
17134  Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17135  Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17136 
17137  if (IsLE) {
17138  SmallVector<int, 16> Consts;
17139  for (int Idx = 0; Idx < 16; Idx++) {
17140  int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17141  : 16 - (NumBytes - Idx);
17142  Consts.push_back(Val);
17143  }
17144  return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17145  Zero, Consts);
17146  }
17148  for (int Idx = 0; Idx < 16; Idx++)
17149  Consts.push_back(Builder.getInt8(NumBytes + Idx));
17150  Value *Mask2 = ConstantVector::get(Consts);
17151  return Builder.CreateBitCast(
17152  Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17153  }
17154  case PPC::BI__builtin_vsx_strmb: {
17155  Value *Op0 = EmitScalarExpr(E->getArg(0));
17156  Value *Op1 = EmitScalarExpr(E->getArg(1));
17157  Value *Op2 = EmitScalarExpr(E->getArg(2));
17158  int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17159  bool IsLE = getTarget().isLittleEndian();
17160  auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17161  // Storing the whole vector, simply store it on BE and reverse bytes and
17162  // store on LE.
17163  if (Width == 16) {
17164  Value *StVec = Op2;
17165  if (IsLE) {
17166  SmallVector<int, 16> RevMask;
17167  for (int Idx = 0; Idx < 16; Idx++)
17168  RevMask.push_back(15 - Idx);
17169  StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17170  }
17171  return Builder.CreateStore(
17172  StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17173  }
17174  auto *ConvTy = Int64Ty;
17175  unsigned NumElts = 0;
17176  switch (Width) {
17177  default:
17178  llvm_unreachable("width for stores must be a power of 2");
17179  case 8:
17180  ConvTy = Int64Ty;
17181  NumElts = 2;
17182  break;
17183  case 4:
17184  ConvTy = Int32Ty;
17185  NumElts = 4;
17186  break;
17187  case 2:
17188  ConvTy = Int16Ty;
17189  NumElts = 8;
17190  break;
17191  case 1:
17192  ConvTy = Int8Ty;
17193  NumElts = 16;
17194  break;
17195  }
17196  Value *Vec = Builder.CreateBitCast(
17197  Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17198  Value *Ptr =
17199  Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17200  Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17201  if (IsLE && Width > 1) {
17202  Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17203  Elt = Builder.CreateCall(F, Elt);
17204  }
17205  return Builder.CreateStore(
17206  Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17207  };
17208  unsigned Stored = 0;
17209  unsigned RemainingBytes = NumBytes;
17210  Value *Result;
17211  if (NumBytes == 16)
17212  return StoreSubVec(16, 0, 0);
17213  if (NumBytes >= 8) {
17214  Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17215  RemainingBytes -= 8;
17216  Stored += 8;
17217  }
17218  if (RemainingBytes >= 4) {
17219  Result = StoreSubVec(4, NumBytes - Stored - 4,
17220  IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17221  RemainingBytes -= 4;
17222  Stored += 4;
17223  }
17224  if (RemainingBytes >= 2) {
17225  Result = StoreSubVec(2, NumBytes - Stored - 2,
17226  IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17227  RemainingBytes -= 2;
17228  Stored += 2;
17229  }
17230  if (RemainingBytes)
17231  Result =
17232  StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17233  return Result;
17234  }
17235  // Square root
17236  case PPC::BI__builtin_vsx_xvsqrtsp:
17237  case PPC::BI__builtin_vsx_xvsqrtdp: {
17238  llvm::Type *ResultType = ConvertType(E->getType());
17239  Value *X = EmitScalarExpr(E->getArg(0));
17240  if (Builder.getIsFPConstrained()) {
17241  llvm::Function *F = CGM.getIntrinsic(
17242  Intrinsic::experimental_constrained_sqrt, ResultType);
17243  return Builder.CreateConstrainedFPCall(F, X);
17244  } else {
17245  llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17246  return Builder.CreateCall(F, X);
17247  }
17248  }
17249  // Count leading zeros
17250  case PPC::BI__builtin_altivec_vclzb:
17251  case PPC::BI__builtin_altivec_vclzh:
17252  case PPC::BI__builtin_altivec_vclzw:
17253  case PPC::BI__builtin_altivec_vclzd: {
17254  llvm::Type *ResultType = ConvertType(E->getType());
17255  Value *X = EmitScalarExpr(E->getArg(0));
17256  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17257  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17258  return Builder.CreateCall(F, {X, Undef});
17259  }
17260  case PPC::BI__builtin_altivec_vctzb:
17261  case PPC::BI__builtin_altivec_vctzh:
17262  case PPC::BI__builtin_altivec_vctzw:
17263  case PPC::BI__builtin_altivec_vctzd: {
17264  llvm::Type *ResultType = ConvertType(E->getType());
17265  Value *X = EmitScalarExpr(E->getArg(0));
17266  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17267  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17268  return Builder.CreateCall(F, {X, Undef});
17269  }
17270  case PPC::BI__builtin_altivec_vinsd:
17271  case PPC::BI__builtin_altivec_vinsw:
17272  case PPC::BI__builtin_altivec_vinsd_elt:
17273  case PPC::BI__builtin_altivec_vinsw_elt: {
17274  llvm::Type *ResultType = ConvertType(E->getType());
17275  Value *Op0 = EmitScalarExpr(E->getArg(0));
17276  Value *Op1 = EmitScalarExpr(E->getArg(1));
17277  Value *Op2 = EmitScalarExpr(E->getArg(2));
17278 
17279  bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17280  BuiltinID == PPC::BI__builtin_altivec_vinsd);
17281 
17282  bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17283  BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17284 
17285  // The third argument must be a compile time constant.
17286  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17287  assert(ArgCI &&
17288  "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17289 
17290  // Valid value for the third argument is dependent on the input type and
17291  // builtin called.
17292  int ValidMaxValue = 0;
17293  if (IsUnaligned)
17294  ValidMaxValue = (Is32bit) ? 12 : 8;
17295  else
17296  ValidMaxValue = (Is32bit) ? 3 : 1;
17297 
17298  // Get value of third argument.
17299  int64_t ConstArg = ArgCI->getSExtValue();
17300 
17301  // Compose range checking error message.
17302  std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17303  RangeErrMsg += " number " + llvm::to_string(ConstArg);
17304  RangeErrMsg += " is outside of the valid range [0, ";
17305  RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
17306 
17307  // Issue error if third argument is not within the valid range.
17308  if (ConstArg < 0 || ConstArg > ValidMaxValue)
17309  CGM.Error(E->getExprLoc(), RangeErrMsg);
17310 
17311  // Input to vec_replace_elt is an element index, convert to byte index.
17312  if (!IsUnaligned) {
17313  ConstArg *= Is32bit ? 4 : 8;
17314  // Fix the constant according to endianess.
17315  if (getTarget().isLittleEndian())
17316  ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17317  }
17318 
17319  ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17320  Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
17321  // Casting input to vector int as per intrinsic definition.
17322  Op0 =
17323  Is32bit
17324  ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
17325  : Builder.CreateBitCast(Op0,
17326  llvm::FixedVectorType::get(Int64Ty, 2));
17327  return Builder.CreateBitCast(
17328  Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
17329  }
17330  case PPC::BI__builtin_altivec_vpopcntb:
17331  case PPC::BI__builtin_altivec_vpopcnth:
17332  case PPC::BI__builtin_altivec_vpopcntw:
17333  case PPC::BI__builtin_altivec_vpopcntd: {
17334  llvm::Type *ResultType = ConvertType(E->getType());
17335  Value *X = EmitScalarExpr(E->getArg(0));
17336  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17337  return Builder.CreateCall(F, X);
17338  }
17339  case PPC::BI__builtin_altivec_vadduqm:
17340  case PPC::BI__builtin_altivec_vsubuqm: {
17341  Value *Op0 = EmitScalarExpr(E->getArg(0));
17342  Value *Op1 = EmitScalarExpr(E->getArg(1));
17343  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17344  Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
17345  Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
17346  if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17347  return Builder.CreateAdd(Op0, Op1, "vadduqm");
17348  else
17349  return Builder.CreateSub(Op0, Op1, "vsubuqm");
17350  }
17351  case PPC::BI__builtin_altivec_vaddcuq_c:
17352  case PPC::BI__builtin_altivec_vsubcuq_c: {
17354  Value *Op0 = EmitScalarExpr(E->getArg(0));
17355  Value *Op1 = EmitScalarExpr(E->getArg(1));
17356  llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17357  llvm::IntegerType::get(getLLVMContext(), 128), 1);
17358  Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17359  Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17360  ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17361  ? Intrinsic::ppc_altivec_vaddcuq
17362  : Intrinsic::ppc_altivec_vsubcuq;
17363  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17364  }
17365  case PPC::BI__builtin_altivec_vaddeuqm_c:
17366  case PPC::BI__builtin_altivec_vaddecuq_c:
17367  case PPC::BI__builtin_altivec_vsubeuqm_c:
17368  case PPC::BI__builtin_altivec_vsubecuq_c: {
17370  Value *Op0 = EmitScalarExpr(E->getArg(0));
17371  Value *Op1 = EmitScalarExpr(E->getArg(1));
17372  Value *Op2 = EmitScalarExpr(E->getArg(2));
17373  llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17374  llvm::IntegerType::get(getLLVMContext(), 128), 1);
17375  Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17376  Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17377  Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
17378  switch (BuiltinID) {
17379  default:
17380  llvm_unreachable("Unsupported intrinsic!");
17381  case PPC::BI__builtin_altivec_vaddeuqm_c:
17382  ID = Intrinsic::ppc_altivec_vaddeuqm;
17383  break;
17384  case PPC::BI__builtin_altivec_vaddecuq_c:
17385  ID = Intrinsic::ppc_altivec_vaddecuq;
17386  break;
17387  case PPC::BI__builtin_altivec_vsubeuqm_c:
17388  ID = Intrinsic::ppc_altivec_vsubeuqm;
17389  break;
17390  case PPC::BI__builtin_altivec_vsubecuq_c:
17391  ID = Intrinsic::ppc_altivec_vsubecuq;
17392  break;
17393  }
17394  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17395  }
17396  case PPC::BI__builtin_ppc_rldimi:
17397  case PPC::BI__builtin_ppc_rlwimi: {
17398  Value *Op0 = EmitScalarExpr(E->getArg(0));
17399  Value *Op1 = EmitScalarExpr(E->getArg(1));
17400  Value *Op2 = EmitScalarExpr(E->getArg(2));
17401  Value *Op3 = EmitScalarExpr(E->getArg(3));
17402  // rldimi is 64-bit instruction, expand the intrinsic before isel to
17403  // leverage peephole and avoid legalization efforts.
17404  if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17405  !getTarget().getTriple().isPPC64()) {
17406  Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17407  Op2 = Builder.CreateZExt(Op2, Int64Ty);
17408  Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
17409  return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
17410  Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
17411  }
17412  return Builder.CreateCall(
17413  CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17414  ? Intrinsic::ppc_rldimi
17415  : Intrinsic::ppc_rlwimi),
17416  {Op0, Op1, Op2, Op3});
17417  }
17418  case PPC::BI__builtin_ppc_rlwnm: {
17419  Value *Op0 = EmitScalarExpr(E->getArg(0));
17420  Value *Op1 = EmitScalarExpr(E->getArg(1));
17421  Value *Op2 = EmitScalarExpr(E->getArg(2));
17422  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17423  {Op0, Op1, Op2});
17424  }
17425  case PPC::BI__builtin_ppc_poppar4:
17426  case PPC::BI__builtin_ppc_poppar8: {
17427  Value *Op0 = EmitScalarExpr(E->getArg(0));
17428  llvm::Type *ArgType = Op0->getType();
17429  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17430  Value *Tmp = Builder.CreateCall(F, Op0);
17431 
17432  llvm::Type *ResultType = ConvertType(E->getType());
17433  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17434  if (Result->getType() != ResultType)
17435  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17436  "cast");
17437  return Result;
17438  }
17439  case PPC::BI__builtin_ppc_cmpb: {
17440  Value *Op0 = EmitScalarExpr(E->getArg(0));
17441  Value *Op1 = EmitScalarExpr(E->getArg(1));
17442  if (getTarget().getTriple().isPPC64()) {
17443  Function *F =
17444  CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17445  return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17446  }
17447  // For 32 bit, emit the code as below:
17448  // %conv = trunc i64 %a to i32
17449  // %conv1 = trunc i64 %b to i32
17450  // %shr = lshr i64 %a, 32
17451  // %conv2 = trunc i64 %shr to i32
17452  // %shr3 = lshr i64 %b, 32
17453  // %conv4 = trunc i64 %shr3 to i32
17454  // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17455  // %conv5 = zext i32 %0 to i64
17456  // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17457  // %conv614 = zext i32 %1 to i64
17458  // %shl = shl nuw i64 %conv614, 32
17459  // %or = or i64 %shl, %conv5
17460  // ret i64 %or
17461  Function *F =
17462  CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17463  Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17464  Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17465  Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17466  Value *ArgOneHi =
17467  Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17468  Value *ArgTwoHi =
17469  Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17470  Value *ResLo = Builder.CreateZExt(
17471  Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17472  Value *ResHiShift = Builder.CreateZExt(
17473  Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17474  Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17475  return Builder.CreateOr(ResLo, ResHi);
17476  }
17477  // Copy sign
17478  case PPC::BI__builtin_vsx_xvcpsgnsp:
17479  case PPC::BI__builtin_vsx_xvcpsgndp: {
17480  llvm::Type *ResultType = ConvertType(E->getType());
17481  Value *X = EmitScalarExpr(E->getArg(0));
17482  Value *Y = EmitScalarExpr(E->getArg(1));
17484  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17485  return Builder.CreateCall(F, {X, Y});
17486  }
17487  // Rounding/truncation
17488  case PPC::BI__builtin_vsx_xvrspip:
17489  case PPC::BI__builtin_vsx_xvrdpip:
17490  case PPC::BI__builtin_vsx_xvrdpim:
17491  case PPC::BI__builtin_vsx_xvrspim:
17492  case PPC::BI__builtin_vsx_xvrdpi:
17493  case PPC::BI__builtin_vsx_xvrspi:
17494  case PPC::BI__builtin_vsx_xvrdpic:
17495  case PPC::BI__builtin_vsx_xvrspic:
17496  case PPC::BI__builtin_vsx_xvrdpiz:
17497  case PPC::BI__builtin_vsx_xvrspiz: {
17498  llvm::Type *ResultType = ConvertType(E->getType());
17499  Value *X = EmitScalarExpr(E->getArg(0));
17500  if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17501  BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17502  ID = Builder.getIsFPConstrained()
17503  ? Intrinsic::experimental_constrained_floor
17504  : Intrinsic::floor;
17505  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17506  BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17507  ID = Builder.getIsFPConstrained()
17508  ? Intrinsic::experimental_constrained_round
17509  : Intrinsic::round;
17510  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17511  BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17512  ID = Builder.getIsFPConstrained()
17513  ? Intrinsic::experimental_constrained_rint
17514  : Intrinsic::rint;
17515  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17516  BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17517  ID = Builder.getIsFPConstrained()
17518  ? Intrinsic::experimental_constrained_ceil
17519  : Intrinsic::ceil;
17520  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17521  BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17522  ID = Builder.getIsFPConstrained()
17523  ? Intrinsic::experimental_constrained_trunc
17524  : Intrinsic::trunc;
17525  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17526  return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17527  : Builder.CreateCall(F, X);
17528  }
17529 
17530  // Absolute value
17531  case PPC::BI__builtin_vsx_xvabsdp:
17532  case PPC::BI__builtin_vsx_xvabssp: {
17533  llvm::Type *ResultType = ConvertType(E->getType());
17534  Value *X = EmitScalarExpr(E->getArg(0));
17535  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17536  return Builder.CreateCall(F, X);
17537  }
17538 
17539  // Fastmath by default
17540  case PPC::BI__builtin_ppc_recipdivf:
17541  case PPC::BI__builtin_ppc_recipdivd:
17542  case PPC::BI__builtin_ppc_rsqrtf:
17543  case PPC::BI__builtin_ppc_rsqrtd: {
17544  FastMathFlags FMF = Builder.getFastMathFlags();
17545  Builder.getFastMathFlags().setFast();
17546  llvm::Type *ResultType = ConvertType(E->getType());
17547  Value *X = EmitScalarExpr(E->getArg(0));
17548 
17549  if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17550  BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17551  Value *Y = EmitScalarExpr(E->getArg(1));
17552  Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17553  Builder.getFastMathFlags() &= (FMF);
17554  return FDiv;
17555  }
17556  auto *One = ConstantFP::get(ResultType, 1.0);
17557  llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17558  Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17559  Builder.getFastMathFlags() &= (FMF);
17560  return FDiv;
17561  }
17562  case PPC::BI__builtin_ppc_alignx: {
17563  Value *Op0 = EmitScalarExpr(E->getArg(0));
17564  Value *Op1 = EmitScalarExpr(E->getArg(1));
17565  ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17566  if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17567  AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17568  llvm::Value::MaximumAlignment);
17569 
17570  emitAlignmentAssumption(Op1, E->getArg(1),
17571  /*The expr loc is sufficient.*/ SourceLocation(),
17572  AlignmentCI, nullptr);
17573  return Op1;
17574  }
17575  case PPC::BI__builtin_ppc_rdlam: {
17576  Value *Op0 = EmitScalarExpr(E->getArg(0));
17577  Value *Op1 = EmitScalarExpr(E->getArg(1));
17578  Value *Op2 = EmitScalarExpr(E->getArg(2));
17579  llvm::Type *Ty = Op0->getType();
17580  Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17581  Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17582  Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17583  return Builder.CreateAnd(Rotate, Op2);
17584  }
17585  case PPC::BI__builtin_ppc_load2r: {
17586  Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17587  Value *Op0 = EmitScalarExpr(E->getArg(0));
17588  Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17589  return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17590  }
17591  // FMA variations
17592  case PPC::BI__builtin_ppc_fnmsub:
17593  case PPC::BI__builtin_ppc_fnmsubs:
17594  case PPC::BI__builtin_vsx_xvmaddadp:
17595  case PPC::BI__builtin_vsx_xvmaddasp:
17596  case PPC::BI__builtin_vsx_xvnmaddadp:
17597  case PPC::BI__builtin_vsx_xvnmaddasp:
17598  case PPC::BI__builtin_vsx_xvmsubadp:
17599  case PPC::BI__builtin_vsx_xvmsubasp:
17600  case PPC::BI__builtin_vsx_xvnmsubadp:
17601  case PPC::BI__builtin_vsx_xvnmsubasp: {
17602  llvm::Type *ResultType = ConvertType(E->getType());
17603  Value *X = EmitScalarExpr(E->getArg(0));
17604  Value *Y = EmitScalarExpr(E->getArg(1));
17605  Value *Z = EmitScalarExpr(E->getArg(2));
17606  llvm::Function *F;
17607  if (Builder.getIsFPConstrained())
17608  F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17609  else
17610  F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17611  switch (BuiltinID) {
17612  case PPC::BI__builtin_vsx_xvmaddadp:
17613  case PPC::BI__builtin_vsx_xvmaddasp:
17614  if (Builder.getIsFPConstrained())
17615  return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17616  else
17617  return Builder.CreateCall(F, {X, Y, Z});
17618  case PPC::BI__builtin_vsx_xvnmaddadp:
17619  case PPC::BI__builtin_vsx_xvnmaddasp:
17620  if (Builder.getIsFPConstrained())
17621  return Builder.CreateFNeg(
17622  Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17623  else
17624  return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17625  case PPC::BI__builtin_vsx_xvmsubadp:
17626  case PPC::BI__builtin_vsx_xvmsubasp:
17627  if (Builder.getIsFPConstrained())
17628  return Builder.CreateConstrainedFPCall(
17629  F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17630  else
17631  return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17632  case PPC::BI__builtin_ppc_fnmsub:
17633  case PPC::BI__builtin_ppc_fnmsubs:
17634  case PPC::BI__builtin_vsx_xvnmsubadp:
17635  case PPC::BI__builtin_vsx_xvnmsubasp:
17636  if (Builder.getIsFPConstrained())
17637  return Builder.CreateFNeg(
17638  Builder.CreateConstrainedFPCall(
17639  F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17640  "neg");
17641  else
17642  return Builder.CreateCall(
17643  CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17644  }
17645  llvm_unreachable("Unknown FMA operation");
17646  return nullptr; // Suppress no-return warning
17647  }
17648 
17649  case PPC::BI__builtin_vsx_insertword: {
17650  Value *Op0 = EmitScalarExpr(E->getArg(0));
17651  Value *Op1 = EmitScalarExpr(E->getArg(1));
17652  Value *Op2 = EmitScalarExpr(E->getArg(2));
17653  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17654 
17655  // Third argument is a compile time constant int. It must be clamped to
17656  // to the range [0, 12].
17657  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17658  assert(ArgCI &&
17659  "Third arg to xxinsertw intrinsic must be constant integer");
17660  const int64_t MaxIndex = 12;
17661  int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17662 
17663  // The builtin semantics don't exactly match the xxinsertw instructions
17664  // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17665  // word from the first argument, and inserts it in the second argument. The
17666  // instruction extracts the word from its second input register and inserts
17667  // it into its first input register, so swap the first and second arguments.
17668  std::swap(Op0, Op1);
17669 
17670  // Need to cast the second argument from a vector of unsigned int to a
17671  // vector of long long.
17672  Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17673 
17674  if (getTarget().isLittleEndian()) {
17675  // Reverse the double words in the vector we will extract from.
17676  Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17677  Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17678 
17679  // Reverse the index.
17680  Index = MaxIndex - Index;
17681  }
17682 
17683  // Intrinsic expects the first arg to be a vector of int.
17684  Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17685  Op2 = ConstantInt::getSigned(Int32Ty, Index);
17686  return Builder.CreateCall(F, {Op0, Op1, Op2});
17687  }
17688 
17689  case PPC::BI__builtin_vsx_extractuword: {
17690  Value *Op0 = EmitScalarExpr(E->getArg(0));
17691  Value *Op1 = EmitScalarExpr(E->getArg(1));
17692  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17693 
17694  // Intrinsic expects the first argument to be a vector of doublewords.
17695  Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17696 
17697  // The second argument is a compile time constant int that needs to
17698  // be clamped to the range [0, 12].
17699  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17700  assert(ArgCI &&
17701  "Second Arg to xxextractuw intrinsic must be a constant integer!");
17702  const int64_t MaxIndex = 12;
17703  int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17704 
17705  if (getTarget().isLittleEndian()) {
17706  // Reverse the index.
17707  Index = MaxIndex - Index;
17708  Op1 = ConstantInt::getSigned(Int32Ty, Index);
17709 
17710  // Emit the call, then reverse the double words of the results vector.
17711  Value *Call = Builder.CreateCall(F, {Op0, Op1});
17712 
17713  Value *ShuffleCall =
17714  Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17715  return ShuffleCall;
17716  } else {
17717  Op1 = ConstantInt::getSigned(Int32Ty, Index);
17718  return Builder.CreateCall(F, {Op0, Op1});
17719  }
17720  }
17721 
17722  case PPC::BI__builtin_vsx_xxpermdi: {
17723  Value *Op0 = EmitScalarExpr(E->getArg(0));
17724  Value *Op1 = EmitScalarExpr(E->getArg(1));
17725  Value *Op2 = EmitScalarExpr(E->getArg(2));
17726  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17727  assert(ArgCI && "Third arg must be constant integer!");
17728 
17729  unsigned Index = ArgCI->getZExtValue();
17730  Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17731  Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17732 
17733  // Account for endianness by treating this as just a shuffle. So we use the
17734  // same indices for both LE and BE in order to produce expected results in
17735  // both cases.
17736  int ElemIdx0 = (Index & 2) >> 1;
17737  int ElemIdx1 = 2 + (Index & 1);
17738 
17739  int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17740  Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17741  QualType BIRetType = E->getType();
17742  auto RetTy = ConvertType(BIRetType);
17743  return Builder.CreateBitCast(ShuffleCall, RetTy);
17744  }
17745 
17746  case PPC::BI__builtin_vsx_xxsldwi: {
17747  Value *Op0 = EmitScalarExpr(E->getArg(0));
17748  Value *Op1 = EmitScalarExpr(E->getArg(1));
17749  Value *Op2 = EmitScalarExpr(E->getArg(2));
17750  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17751  assert(ArgCI && "Third argument must be a compile time constant");
17752  unsigned Index = ArgCI->getZExtValue() & 0x3;
17753  Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17754  Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17755 
17756  // Create a shuffle mask
17757  int ElemIdx0;
17758  int ElemIdx1;
17759  int ElemIdx2;
17760  int ElemIdx3;
17761  if (getTarget().isLittleEndian()) {
17762  // Little endian element N comes from element 8+N-Index of the
17763  // concatenated wide vector (of course, using modulo arithmetic on
17764  // the total number of elements).
17765  ElemIdx0 = (8 - Index) % 8;
17766  ElemIdx1 = (9 - Index) % 8;
17767  ElemIdx2 = (10 - Index) % 8;
17768  ElemIdx3 = (11 - Index) % 8;
17769  } else {
17770  // Big endian ElemIdx<N> = Index + N
17771  ElemIdx0 = Index;
17772  ElemIdx1 = Index + 1;
17773  ElemIdx2 = Index + 2;
17774  ElemIdx3 = Index + 3;
17775  }
17776 
17777  int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17778  Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17779  QualType BIRetType = E->getType();
17780  auto RetTy = ConvertType(BIRetType);
17781  return Builder.CreateBitCast(ShuffleCall, RetTy);
17782  }
17783 
17784  case PPC::BI__builtin_pack_vector_int128: {
17785  Value *Op0 = EmitScalarExpr(E->getArg(0));
17786  Value *Op1 = EmitScalarExpr(E->getArg(1));
17787  bool isLittleEndian = getTarget().isLittleEndian();
17788  Value *PoisonValue =
17789  llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17790  Value *Res = Builder.CreateInsertElement(
17791  PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17792  Res = Builder.CreateInsertElement(Res, Op1,
17793  (uint64_t)(isLittleEndian ? 0 : 1));
17794  return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17795  }
17796 
17797  case PPC::BI__builtin_unpack_vector_int128: {
17798  Value *Op0 = EmitScalarExpr(E->getArg(0));
17799  Value *Op1 = EmitScalarExpr(E->getArg(1));
17800  ConstantInt *Index = cast<ConstantInt>(Op1);
17801  Value *Unpacked = Builder.CreateBitCast(
17802  Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17803 
17804  if (getTarget().isLittleEndian())
17805  Index =
17806  ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
17807 
17808  return Builder.CreateExtractElement(Unpacked, Index);
17809  }
17810 
17811  case PPC::BI__builtin_ppc_sthcx: {
17812  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17813  Value *Op0 = EmitScalarExpr(E->getArg(0));
17814  Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17815  return Builder.CreateCall(F, {Op0, Op1});
17816  }
17817 
17818  // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17819  // Some of the MMA instructions accumulate their result into an existing
17820  // accumulator whereas the others generate a new accumulator. So we need to
17821  // use custom code generation to expand a builtin call with a pointer to a
17822  // load (if the corresponding instruction accumulates its result) followed by
17823  // the call to the intrinsic and a store of the result.
17824 #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17825  case PPC::BI__builtin_##Name:
17826 #include "clang/Basic/BuiltinsPPC.def"
17827  {
17829  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
17830  if (E->getArg(i)->getType()->isArrayType())
17831  Ops.push_back(
17832  EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
17833  else
17834  Ops.push_back(EmitScalarExpr(E->getArg(i)));
17835  // The first argument of these two builtins is a pointer used to store their
17836  // result. However, the llvm intrinsics return their result in multiple
17837  // return values. So, here we emit code extracting these values from the
17838  // intrinsic results and storing them using that pointer.
17839  if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
17840  BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
17841  BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
17842  unsigned NumVecs = 2;
17843  auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17844  if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17845  NumVecs = 4;
17846  Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17847  }
17848  llvm::Function *F = CGM.getIntrinsic(Intrinsic);
17849  Address Addr = EmitPointerWithAlignment(E->getArg(1));
17850  Value *Vec = Builder.CreateLoad(Addr);
17851  Value *Call = Builder.CreateCall(F, {Vec});
17852  llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
17853  Value *Ptr = Ops[0];
17854  for (unsigned i=0; i<NumVecs; i++) {
17855  Value *Vec = Builder.CreateExtractValue(Call, i);
17856  llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
17857  Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
17858  Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
17859  }
17860  return Call;
17861  }
17862  if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17863  BuiltinID == PPC::BI__builtin_mma_build_acc) {
17864  // Reverse the order of the operands for LE, so the
17865  // same builtin call can be used on both LE and BE
17866  // without the need for the programmer to swap operands.
17867  // The operands are reversed starting from the second argument,
17868  // the first operand is the pointer to the pair/accumulator
17869  // that is being built.
17870  if (getTarget().isLittleEndian())
17871  std::reverse(Ops.begin() + 1, Ops.end());
17872  }
17873  bool Accumulate;
17874  switch (BuiltinID) {
17875  #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17876  case PPC::BI__builtin_##Name: \
17877  ID = Intrinsic::ppc_##Intr; \
17878  Accumulate = Acc; \
17879  break;
17880  #include "clang/Basic/BuiltinsPPC.def"
17881  }
17882  if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17883  BuiltinID == PPC::BI__builtin_vsx_stxvp ||
17884  BuiltinID == PPC::BI__builtin_mma_lxvp ||
17885  BuiltinID == PPC::BI__builtin_mma_stxvp) {
17886  if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17887  BuiltinID == PPC::BI__builtin_mma_lxvp) {
17888  Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17889  } else {
17890  Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17891  }
17892  Ops.pop_back();
17893  llvm::Function *F = CGM.getIntrinsic(ID);
17894  return Builder.CreateCall(F, Ops, "");
17895  }
17896  SmallVector<Value*, 4> CallOps;
17897  if (Accumulate) {
17898  Address Addr = EmitPointerWithAlignment(E->getArg(0));
17899  Value *Acc = Builder.CreateLoad(Addr);
17900  CallOps.push_back(Acc);
17901  }
17902  for (unsigned i=1; i<Ops.size(); i++)
17903  CallOps.push_back(Ops[i]);
17904  llvm::Function *F = CGM.getIntrinsic(ID);
17905  Value *Call = Builder.CreateCall(F, CallOps);
17906  return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
17907  }
17908 
17909  case PPC::BI__builtin_ppc_compare_and_swap:
17910  case PPC::BI__builtin_ppc_compare_and_swaplp: {
17911  Address Addr = EmitPointerWithAlignment(E->getArg(0));
17912  Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
17913  Value *OldVal = Builder.CreateLoad(OldValAddr);
17914  QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
17915  LValue LV = MakeAddrLValue(Addr, AtomicTy);
17916  Value *Op2 = EmitScalarExpr(E->getArg(2));
17917  auto Pair = EmitAtomicCompareExchange(
17918  LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
17919  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
17920  // Unlike c11's atomic_compare_exchange, according to
17921  // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17922  // > In either case, the contents of the memory location specified by addr
17923  // > are copied into the memory location specified by old_val_addr.
17924  // But it hasn't specified storing to OldValAddr is atomic or not and
17925  // which order to use. Now following XL's codegen, treat it as a normal
17926  // store.
17927  Value *LoadedVal = Pair.first.getScalarVal();
17928  Builder.CreateStore(LoadedVal, OldValAddr);
17929  return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
17930  }
17931  case PPC::BI__builtin_ppc_fetch_and_add:
17932  case PPC::BI__builtin_ppc_fetch_and_addlp: {
17933  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
17934  llvm::AtomicOrdering::Monotonic);
17935  }
17936  case PPC::BI__builtin_ppc_fetch_and_and:
17937  case PPC::BI__builtin_ppc_fetch_and_andlp: {
17938  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
17939  llvm::AtomicOrdering::Monotonic);
17940  }
17941 
17942  case PPC::BI__builtin_ppc_fetch_and_or:
17943  case PPC::BI__builtin_ppc_fetch_and_orlp: {
17944  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
17945  llvm::AtomicOrdering::Monotonic);
17946  }
17947  case PPC::BI__builtin_ppc_fetch_and_swap:
17948  case PPC::BI__builtin_ppc_fetch_and_swaplp: {
17949  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
17950  llvm::AtomicOrdering::Monotonic);
17951  }
17952  case PPC::BI__builtin_ppc_ldarx:
17953  case PPC::BI__builtin_ppc_lwarx:
17954  case PPC::BI__builtin_ppc_lharx:
17955  case PPC::BI__builtin_ppc_lbarx:
17956  return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
17957  case PPC::BI__builtin_ppc_mfspr: {
17958  Value *Op0 = EmitScalarExpr(E->getArg(0));
17959  llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17960  ? Int32Ty
17961  : Int64Ty;
17962  Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
17963  return Builder.CreateCall(F, {Op0});
17964  }
17965  case PPC::BI__builtin_ppc_mtspr: {
17966  Value *Op0 = EmitScalarExpr(E->getArg(0));
17967  Value *Op1 = EmitScalarExpr(E->getArg(1));
17968  llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17969  ? Int32Ty
17970  : Int64Ty;
17971  Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
17972  return Builder.CreateCall(F, {Op0, Op1});
17973  }
17974  case PPC::BI__builtin_ppc_popcntb: {
17975  Value *ArgValue = EmitScalarExpr(E->getArg(0));
17976  llvm::Type *ArgType = ArgValue->getType();
17977  Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
17978  return Builder.CreateCall(F, {ArgValue}, "popcntb");
17979  }
17980  case PPC::BI__builtin_ppc_mtfsf: {
17981  // The builtin takes a uint32 that needs to be cast to an
17982  // f64 to be passed to the intrinsic.
17983  Value *Op0 = EmitScalarExpr(E->getArg(0));
17984  Value *Op1 = EmitScalarExpr(E->getArg(1));
17985  Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
17986  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
17987  return Builder.CreateCall(F, {Op0, Cast}, "");
17988  }
17989 
17990  case PPC::BI__builtin_ppc_swdiv_nochk:
17991  case PPC::BI__builtin_ppc_swdivs_nochk: {
17992  Value *Op0 = EmitScalarExpr(E->getArg(0));
17993  Value *Op1 = EmitScalarExpr(E->getArg(1));
17994  FastMathFlags FMF = Builder.getFastMathFlags();
17995  Builder.getFastMathFlags().setFast();
17996  Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
17997  Builder.getFastMathFlags() &= (FMF);
17998  return FDiv;
17999  }
18000  case PPC::BI__builtin_ppc_fric:
18002  *this, E, Intrinsic::rint,
18003  Intrinsic::experimental_constrained_rint))
18004  .getScalarVal();
18005  case PPC::BI__builtin_ppc_frim:
18006  case PPC::BI__builtin_ppc_frims:
18008  *this, E, Intrinsic::floor,
18009  Intrinsic::experimental_constrained_floor))
18010  .getScalarVal();
18011  case PPC::BI__builtin_ppc_frin:
18012  case PPC::BI__builtin_ppc_frins:
18014  *this, E, Intrinsic::round,
18015  Intrinsic::experimental_constrained_round))
18016  .getScalarVal();
18017  case PPC::BI__builtin_ppc_frip:
18018  case PPC::BI__builtin_ppc_frips:
18020  *this, E, Intrinsic::ceil,
18021  Intrinsic::experimental_constrained_ceil))
18022  .getScalarVal();
18023  case PPC::BI__builtin_ppc_friz:
18024  case PPC::BI__builtin_ppc_frizs:
18026  *this, E, Intrinsic::trunc,
18027  Intrinsic::experimental_constrained_trunc))
18028  .getScalarVal();
18029  case PPC::BI__builtin_ppc_fsqrt:
18030  case PPC::BI__builtin_ppc_fsqrts:
18032  *this, E, Intrinsic::sqrt,
18033  Intrinsic::experimental_constrained_sqrt))
18034  .getScalarVal();
18035  case PPC::BI__builtin_ppc_test_data_class: {
18036  Value *Op0 = EmitScalarExpr(E->getArg(0));
18037  Value *Op1 = EmitScalarExpr(E->getArg(1));
18038  return Builder.CreateCall(
18039  CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18040  {Op0, Op1}, "test_data_class");
18041  }
18042  case PPC::BI__builtin_ppc_maxfe: {
18043  Value *Op0 = EmitScalarExpr(E->getArg(0));
18044  Value *Op1 = EmitScalarExpr(E->getArg(1));
18045  Value *Op2 = EmitScalarExpr(E->getArg(2));
18046  Value *Op3 = EmitScalarExpr(E->getArg(3));
18047  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18048  {Op0, Op1, Op2, Op3});
18049  }
18050  case PPC::BI__builtin_ppc_maxfl: {
18051  Value *Op0 = EmitScalarExpr(E->getArg(0));
18052  Value *Op1 = EmitScalarExpr(E->getArg(1));
18053  Value *Op2 = EmitScalarExpr(E->getArg(2));
18054  Value *Op3 = EmitScalarExpr(E->getArg(3));
18055  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18056  {Op0, Op1, Op2, Op3});
18057  }
18058  case PPC::BI__builtin_ppc_maxfs: {
18059  Value *Op0 = EmitScalarExpr(E->getArg(0));
18060  Value *Op1 = EmitScalarExpr(E->getArg(1));
18061  Value *Op2 = EmitScalarExpr(E->getArg(2));
18062  Value *Op3 = EmitScalarExpr(E->getArg(3));
18063  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18064  {Op0, Op1, Op2, Op3});
18065  }
18066  case PPC::BI__builtin_ppc_minfe: {
18067  Value *Op0 = EmitScalarExpr(E->getArg(0));
18068  Value *Op1 = EmitScalarExpr(E->getArg(1));
18069  Value *Op2 = EmitScalarExpr(E->getArg(2));
18070  Value *Op3 = EmitScalarExpr(E->getArg(3));
18071  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18072  {Op0, Op1, Op2, Op3});
18073  }
18074  case PPC::BI__builtin_ppc_minfl: {
18075  Value *Op0 = EmitScalarExpr(E->getArg(0));
18076  Value *Op1 = EmitScalarExpr(E->getArg(1));
18077  Value *Op2 = EmitScalarExpr(E->getArg(2));
18078  Value *Op3 = EmitScalarExpr(E->getArg(3));
18079  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18080  {Op0, Op1, Op2, Op3});
18081  }
18082  case PPC::BI__builtin_ppc_minfs: {
18083  Value *Op0 = EmitScalarExpr(E->getArg(0));
18084  Value *Op1 = EmitScalarExpr(E->getArg(1));
18085  Value *Op2 = EmitScalarExpr(E->getArg(2));
18086  Value *Op3 = EmitScalarExpr(E->getArg(3));
18087  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18088  {Op0, Op1, Op2, Op3});
18089  }
18090  case PPC::BI__builtin_ppc_swdiv:
18091  case PPC::BI__builtin_ppc_swdivs: {
18092  Value *Op0 = EmitScalarExpr(E->getArg(0));
18093  Value *Op1 = EmitScalarExpr(E->getArg(1));
18094  return Builder.CreateFDiv(Op0, Op1, "swdiv");
18095  }
18096  case PPC::BI__builtin_ppc_set_fpscr_rn:
18097  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18098  {EmitScalarExpr(E->getArg(0))});
18099  case PPC::BI__builtin_ppc_mffs:
18100  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18101  }
18102 }
18103 
18104 namespace {
18105 // If \p E is not null pointer, insert address space cast to match return
18106 // type of \p E if necessary.
18107 Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18108  const CallExpr *E = nullptr) {
18109  auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18110  auto *Call = CGF.Builder.CreateCall(F);
18111  Call->addRetAttr(
18112  Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18113  Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18114  if (!E)
18115  return Call;
18116  QualType BuiltinRetType = E->getType();
18117  auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18118  if (RetTy == Call->getType())
18119  return Call;
18120  return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18121 }
18122 
18123 Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18124  auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18125  auto *Call = CGF.Builder.CreateCall(F);
18126  Call->addRetAttr(
18127  Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18128  Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18129  return Call;
18130 }
18131 
18132 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18133 /// Emit code based on Code Object ABI version.
18134 /// COV_4 : Emit code to use dispatch ptr
18135 /// COV_5+ : Emit code to use implicitarg ptr
18136 /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18137 /// and use its value for COV_4 or COV_5+ approach. It is used for
18138 /// compiling device libraries in an ABI-agnostic way.
18139 ///
18140 /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18141 /// clang during compilation of user code.
18142 Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18143  llvm::LoadInst *LD;
18144 
18145  auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18146 
18147  if (Cov == CodeObjectVersionKind::COV_None) {
18148  StringRef Name = "__oclc_ABI_version";
18149  auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18150  if (!ABIVersionC)
18151  ABIVersionC = new llvm::GlobalVariable(
18152  CGF.CGM.getModule(), CGF.Int32Ty, false,
18153  llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18154  llvm::GlobalVariable::NotThreadLocal,
18156 
18157  // This load will be eliminated by the IPSCCP because it is constant
18158  // weak_odr without externally_initialized. Either changing it to weak or
18159  // adding externally_initialized will keep the load.
18160  Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18161  CGF.CGM.getIntAlign());
18162 
18163  Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18164  ABIVersion,
18165  llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18166 
18167  // Indexing the implicit kernarg segment.
18168  Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18169  CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18170 
18171  // Indexing the HSA kernel_dispatch_packet struct.
18172  Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18173  CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18174 
18175  auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18176  LD = CGF.Builder.CreateLoad(
18177  Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
18178  } else {
18179  Value *GEP = nullptr;
18180  if (Cov >= CodeObjectVersionKind::COV_5) {
18181  // Indexing the implicit kernarg segment.
18182  GEP = CGF.Builder.CreateConstGEP1_32(
18183  CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18184  } else {
18185  // Indexing the HSA kernel_dispatch_packet struct.
18186  GEP = CGF.Builder.CreateConstGEP1_32(
18187  CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18188  }
18189  LD = CGF.Builder.CreateLoad(
18190  Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
18191  }
18192 
18193  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18194  llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18195  APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18196  LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18197  LD->setMetadata(llvm::LLVMContext::MD_noundef,
18198  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18199  LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18200  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18201  return LD;
18202 }
18203 
18204 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18205 Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18206  const unsigned XOffset = 12;
18207  auto *DP = EmitAMDGPUDispatchPtr(CGF);
18208  // Indexing the HSA kernel_dispatch_packet struct.
18209  auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18210  auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18211  auto *LD = CGF.Builder.CreateLoad(
18212  Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
18213  LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18214  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18215  return LD;
18216 }
18217 } // namespace
18218 
18219 // For processing memory ordering and memory scope arguments of various
18220 // amdgcn builtins.
18221 // \p Order takes a C++11 comptabile memory-ordering specifier and converts
18222 // it into LLVM's memory ordering specifier using atomic C ABI, and writes
18223 // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18224 // specific SyncScopeID and writes it to \p SSID.
18226  llvm::AtomicOrdering &AO,
18227  llvm::SyncScope::ID &SSID) {
18228  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18229 
18230  // Map C11/C++11 memory ordering to LLVM memory ordering
18231  assert(llvm::isValidAtomicOrderingCABI(ord));
18232  switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18233  case llvm::AtomicOrderingCABI::acquire:
18234  case llvm::AtomicOrderingCABI::consume:
18235  AO = llvm::AtomicOrdering::Acquire;
18236  break;
18237  case llvm::AtomicOrderingCABI::release:
18238  AO = llvm::AtomicOrdering::Release;
18239  break;
18240  case llvm::AtomicOrderingCABI::acq_rel:
18241  AO = llvm::AtomicOrdering::AcquireRelease;
18242  break;
18243  case llvm::AtomicOrderingCABI::seq_cst:
18244  AO = llvm::AtomicOrdering::SequentiallyConsistent;
18245  break;
18246  case llvm::AtomicOrderingCABI::relaxed:
18247  AO = llvm::AtomicOrdering::Monotonic;
18248  break;
18249  }
18250 
18251  StringRef scp;
18252  llvm::getConstantStringInfo(Scope, scp);
18253  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18254 }
18255 
18256 llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18257  unsigned Idx,
18258  const CallExpr *E) {
18259  llvm::Value *Arg = nullptr;
18260  if ((ICEArguments & (1 << Idx)) == 0) {
18261  Arg = EmitScalarExpr(E->getArg(Idx));
18262  } else {
18263  // If this is required to be a constant, constant fold it so that we
18264  // know that the generated intrinsic gets a ConstantInt.
18265  std::optional<llvm::APSInt> Result =
18266  E->getArg(Idx)->getIntegerConstantExpr(getContext());
18267  assert(Result && "Expected argument to be a constant");
18268  Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
18269  }
18270  return Arg;
18271 }
18272 
18274  if (QT->hasFloatingRepresentation()) {
18275  switch (elementCount) {
18276  case 2:
18277  return Intrinsic::dx_dot2;
18278  case 3:
18279  return Intrinsic::dx_dot3;
18280  case 4:
18281  return Intrinsic::dx_dot4;
18282  }
18283  }
18285  return Intrinsic::dx_sdot;
18286 
18287  assert(QT->hasUnsignedIntegerRepresentation());
18288  return Intrinsic::dx_udot;
18289 }
18290 
18292  const CallExpr *E) {
18293  if (!getLangOpts().HLSL)
18294  return nullptr;
18295 
18296  switch (BuiltinID) {
18297  case Builtin::BI__builtin_hlsl_elementwise_all: {
18298  Value *Op0 = EmitScalarExpr(E->getArg(0));
18299  return Builder.CreateIntrinsic(
18300  /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18301  CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18302  "hlsl.all");
18303  }
18304  case Builtin::BI__builtin_hlsl_elementwise_any: {
18305  Value *Op0 = EmitScalarExpr(E->getArg(0));
18306  return Builder.CreateIntrinsic(
18307  /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18308  CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18309  "hlsl.any");
18310  }
18311  case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18312  Value *OpX = EmitScalarExpr(E->getArg(0));
18313  Value *OpMin = EmitScalarExpr(E->getArg(1));
18314  Value *OpMax = EmitScalarExpr(E->getArg(2));
18315 
18316  QualType Ty = E->getArg(0)->getType();
18317  bool IsUnsigned = false;
18318  if (auto *VecTy = Ty->getAs<VectorType>())
18319  Ty = VecTy->getElementType();
18320  IsUnsigned = Ty->isUnsignedIntegerType();
18321  return Builder.CreateIntrinsic(
18322  /*ReturnType=*/OpX->getType(),
18323  IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18324  ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18325  }
18326  case Builtin::BI__builtin_hlsl_dot: {
18327  Value *Op0 = EmitScalarExpr(E->getArg(0));
18328  Value *Op1 = EmitScalarExpr(E->getArg(1));
18329  llvm::Type *T0 = Op0->getType();
18330  llvm::Type *T1 = Op1->getType();
18331  if (!T0->isVectorTy() && !T1->isVectorTy()) {
18332  if (T0->isFloatingPointTy())
18333  return Builder.CreateFMul(Op0, Op1, "dx.dot");
18334 
18335  if (T0->isIntegerTy())
18336  return Builder.CreateMul(Op0, Op1, "dx.dot");
18337 
18338  // Bools should have been promoted
18339  llvm_unreachable(
18340  "Scalar dot product is only supported on ints and floats.");
18341  }
18342  // A VectorSplat should have happened
18343  assert(T0->isVectorTy() && T1->isVectorTy() &&
18344  "Dot product of vector and scalar is not supported.");
18345 
18346  // A vector sext or sitofp should have happened
18347  assert(T0->getScalarType() == T1->getScalarType() &&
18348  "Dot product of vectors need the same element types.");
18349 
18350  auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
18351  [[maybe_unused]] auto *VecTy1 =
18352  E->getArg(1)->getType()->getAs<VectorType>();
18353  // A HLSLVectorTruncation should have happend
18354  assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18355  "Dot product requires vectors to be of the same size.");
18356 
18357  return Builder.CreateIntrinsic(
18358  /*ReturnType=*/T0->getScalarType(),
18360  VecTy0->getNumElements()),
18361  ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18362  } break;
18363  case Builtin::BI__builtin_hlsl_lerp: {
18364  Value *X = EmitScalarExpr(E->getArg(0));
18365  Value *Y = EmitScalarExpr(E->getArg(1));
18366  Value *S = EmitScalarExpr(E->getArg(2));
18367  if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18368  llvm_unreachable("lerp operand must have a float representation");
18369  return Builder.CreateIntrinsic(
18370  /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
18371  ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
18372  }
18373  case Builtin::BI__builtin_hlsl_elementwise_frac: {
18374  Value *Op0 = EmitScalarExpr(E->getArg(0));
18375  if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18376  llvm_unreachable("frac operand must have a float representation");
18377  return Builder.CreateIntrinsic(
18378  /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,
18379  ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
18380  }
18381  case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18382  Value *Op0 = EmitScalarExpr(E->getArg(0));
18383  llvm::Type *Xty = Op0->getType();
18384  llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
18385  if (Xty->isVectorTy()) {
18386  auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
18387  retType = llvm::VectorType::get(
18388  retType, ElementCount::getFixed(XVecTy->getNumElements()));
18389  }
18390  if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18391  llvm_unreachable("isinf operand must have a float representation");
18392  return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18393  ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18394  }
18395  case Builtin::BI__builtin_hlsl_mad: {
18396  Value *M = EmitScalarExpr(E->getArg(0));
18397  Value *A = EmitScalarExpr(E->getArg(1));
18398  Value *B = EmitScalarExpr(E->getArg(2));
18399  if (E->getArg(0)->getType()->hasFloatingRepresentation())
18400  return Builder.CreateIntrinsic(
18401  /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18402  ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
18403 
18404  if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
18405  if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18406  return Builder.CreateIntrinsic(
18407  /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18408  ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18409 
18410  Value *Mul = Builder.CreateNSWMul(M, A);
18411  return Builder.CreateNSWAdd(Mul, B);
18412  }
18413  assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
18414  if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18415  return Builder.CreateIntrinsic(
18416  /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18417  ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18418 
18419  Value *Mul = Builder.CreateNUWMul(M, A);
18420  return Builder.CreateNUWAdd(Mul, B);
18421  }
18422  case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18423  Value *Op0 = EmitScalarExpr(E->getArg(0));
18424  if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18425  llvm_unreachable("rcp operand must have a float representation");
18426  llvm::Type *Ty = Op0->getType();
18427  llvm::Type *EltTy = Ty->getScalarType();
18428  Constant *One =
18429  Ty->isVectorTy()
18430  ? ConstantVector::getSplat(
18431  ElementCount::getFixed(
18432  dyn_cast<FixedVectorType>(Ty)->getNumElements()),
18433  ConstantFP::get(EltTy, 1.0))
18434  : ConstantFP::get(EltTy, 1.0);
18435  return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
18436  }
18437  case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18438  Value *Op0 = EmitScalarExpr(E->getArg(0));
18439  if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18440  llvm_unreachable("rsqrt operand must have a float representation");
18441  return Builder.CreateIntrinsic(
18442  /*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
18443  ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt");
18444  }
18445  case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
18446  auto *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
18447  llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
18448  {}, false, true));
18449  if (getTarget().getTriple().isSPIRVLogical())
18450  CI = dyn_cast<CallInst>(addControlledConvergenceToken(CI));
18451  return CI;
18452  }
18453  }
18454  return nullptr;
18455 }
18456 
18458  const CallExpr *E) {
18459  llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18460  llvm::SyncScope::ID SSID;
18461  switch (BuiltinID) {
18462  case AMDGPU::BI__builtin_amdgcn_div_scale:
18463  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18464  // Translate from the intrinsics's struct return to the builtin's out
18465  // argument.
18466 
18467  Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
18468 
18469  llvm::Value *X = EmitScalarExpr(E->getArg(0));
18470  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
18471  llvm::Value *Z = EmitScalarExpr(E->getArg(2));
18472 
18473  llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18474  X->getType());
18475 
18476  llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
18477 
18478  llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
18479  llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
18480 
18481  llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18482 
18483  llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
18484  Builder.CreateStore(FlagExt, FlagOutPtr);
18485  return Result;
18486  }
18487  case AMDGPU::BI__builtin_amdgcn_div_fmas:
18488  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18489  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18490  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18491  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18492  llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18493 
18494  llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18495  Src0->getType());
18496  llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
18497  return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
18498  }
18499 
18500  case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18501  return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
18502  case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18503  return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
18504  case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18505  case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18507  // Find out if any arguments are required to be integer constant
18508  // expressions.
18509  unsigned ICEArguments = 0;
18511  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
18512  assert(Error == ASTContext::GE_None && "Should not codegen an error");
18513  for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18514  Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
18515  }
18516  assert(Args.size() == 5 || Args.size() == 6);
18517  if (Args.size() == 5)
18518  Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
18519  Function *F =
18520  CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18521  return Builder.CreateCall(F, Args);
18522  }
18523  case AMDGPU::BI__builtin_amdgcn_div_fixup:
18524  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18525  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18526  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
18527  case AMDGPU::BI__builtin_amdgcn_trig_preop:
18528  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18529  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18530  case AMDGPU::BI__builtin_amdgcn_rcp:
18531  case AMDGPU::BI__builtin_amdgcn_rcpf:
18532  case AMDGPU::BI__builtin_amdgcn_rcph:
18533  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
18534  case AMDGPU::BI__builtin_amdgcn_sqrt:
18535  case AMDGPU::BI__builtin_amdgcn_sqrtf:
18536  case AMDGPU::BI__builtin_amdgcn_sqrth:
18537  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
18538  case AMDGPU::BI__builtin_amdgcn_rsq:
18539  case AMDGPU::BI__builtin_amdgcn_rsqf:
18540  case AMDGPU::BI__builtin_amdgcn_rsqh:
18541  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
18542  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18543  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18544  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
18545  case AMDGPU::BI__builtin_amdgcn_sinf:
18546  case AMDGPU::BI__builtin_amdgcn_sinh:
18547  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
18548  case AMDGPU::BI__builtin_amdgcn_cosf:
18549  case AMDGPU::BI__builtin_amdgcn_cosh:
18550  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
18551  case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18552  return EmitAMDGPUDispatchPtr(*this, E);
18553  case AMDGPU::BI__builtin_amdgcn_logf:
18554  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
18555  case AMDGPU::BI__builtin_amdgcn_exp2f:
18556  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2);
18557  case AMDGPU::BI__builtin_amdgcn_log_clampf:
18558  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
18559  case AMDGPU::BI__builtin_amdgcn_ldexp:
18560  case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18561  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18562  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18563  llvm::Function *F =
18564  CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18565  return Builder.CreateCall(F, {Src0, Src1});
18566  }
18567  case AMDGPU::BI__builtin_amdgcn_ldexph: {
18568  // The raw instruction has a different behavior for out of bounds exponent
18569  // values (implicit truncation instead of saturate to short_min/short_max).
18570  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18571  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18572  llvm::Function *F =
18573  CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18574  return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
18575  }
18576  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18577  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18578  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18579  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
18580  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18581  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18582  Value *Src0 = EmitScalarExpr(E->getArg(0));
18583  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18584  { Builder.getInt32Ty(), Src0->getType() });
18585  return Builder.CreateCall(F, Src0);
18586  }
18587  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18588  Value *Src0 = EmitScalarExpr(E->getArg(0));
18589  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18590  { Builder.getInt16Ty(), Src0->getType() });
18591  return Builder.CreateCall(F, Src0);
18592  }
18593  case AMDGPU::BI__builtin_amdgcn_fract:
18594  case AMDGPU::BI__builtin_amdgcn_fractf:
18595  case AMDGPU::BI__builtin_amdgcn_fracth:
18596  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
18597  case AMDGPU::BI__builtin_amdgcn_lerp:
18598  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
18599  case AMDGPU::BI__builtin_amdgcn_ubfe:
18600  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
18601  case AMDGPU::BI__builtin_amdgcn_sbfe:
18602  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
18603  case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18604  case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18605  llvm::Type *ResultType = ConvertType(E->getType());
18606  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18607  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18608  return Builder.CreateCall(F, { Src });
18609  }
18610  case AMDGPU::BI__builtin_amdgcn_uicmp:
18611  case AMDGPU::BI__builtin_amdgcn_uicmpl:
18612  case AMDGPU::BI__builtin_amdgcn_sicmp:
18613  case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18614  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18615  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18616  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18617 
18618  // FIXME-GFX10: How should 32 bit mask be handled?
18619  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18620  { Builder.getInt64Ty(), Src0->getType() });
18621  return Builder.CreateCall(F, { Src0, Src1, Src2 });
18622  }
18623  case AMDGPU::BI__builtin_amdgcn_fcmp:
18624  case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18625  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18626  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18627  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18628 
18629  // FIXME-GFX10: How should 32 bit mask be handled?
18630  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18631  { Builder.getInt64Ty(), Src0->getType() });
18632  return Builder.CreateCall(F, { Src0, Src1, Src2 });
18633  }
18634  case AMDGPU::BI__builtin_amdgcn_class:
18635  case AMDGPU::BI__builtin_amdgcn_classf:
18636  case AMDGPU::BI__builtin_amdgcn_classh:
18637  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18638  case AMDGPU::BI__builtin_amdgcn_fmed3f:
18639  case AMDGPU::BI__builtin_amdgcn_fmed3h:
18640  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
18641  case AMDGPU::BI__builtin_amdgcn_ds_append:
18642  case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18643  Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18644  Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18645  Value *Src0 = EmitScalarExpr(E->getArg(0));
18646  Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18647  return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18648  }
18649  case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18650  case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18651  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
18652  Intrinsic::ID Intrin;
18653  switch (BuiltinID) {
18654  case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18655  Intrin = Intrinsic::amdgcn_ds_fadd;
18656  break;
18657  case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18658  Intrin = Intrinsic::amdgcn_ds_fmin;
18659  break;
18660  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
18661  Intrin = Intrinsic::amdgcn_ds_fmax;
18662  break;
18663  }
18664  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18665  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18666  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18667  llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18668  llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
18669  llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
18670  llvm::FunctionType *FTy = F->getFunctionType();
18671  llvm::Type *PTy = FTy->getParamType(0);
18672  Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
18673  return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
18674  }
18675  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18676  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18677  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18678  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18679  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18680  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18681  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18682  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18683  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18684  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18685  Intrinsic::ID IID;
18686  llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18687  switch (BuiltinID) {
18688  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18689  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18690  IID = Intrinsic::amdgcn_global_atomic_fadd;
18691  break;
18692  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18693  ArgTy = llvm::FixedVectorType::get(
18694  llvm::Type::getHalfTy(getLLVMContext()), 2);
18695  IID = Intrinsic::amdgcn_global_atomic_fadd;
18696  break;
18697  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18698  IID = Intrinsic::amdgcn_global_atomic_fadd;
18699  break;
18700  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18701  IID = Intrinsic::amdgcn_global_atomic_fmin;
18702  break;
18703  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18704  IID = Intrinsic::amdgcn_global_atomic_fmax;
18705  break;
18706  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18707  IID = Intrinsic::amdgcn_flat_atomic_fadd;
18708  break;
18709  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18710  IID = Intrinsic::amdgcn_flat_atomic_fmin;
18711  break;
18712  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18713  IID = Intrinsic::amdgcn_flat_atomic_fmax;
18714  break;
18715  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18716  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18717  IID = Intrinsic::amdgcn_flat_atomic_fadd;
18718  break;
18719  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18720  ArgTy = llvm::FixedVectorType::get(
18721  llvm::Type::getHalfTy(getLLVMContext()), 2);
18722  IID = Intrinsic::amdgcn_flat_atomic_fadd;
18723  break;
18724  }
18725  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18726  llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18727  llvm::Function *F =
18728  CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18729  return Builder.CreateCall(F, {Addr, Val});
18730  }
18731  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18732  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18733  Intrinsic::ID IID;
18734  switch (BuiltinID) {
18735  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18736  IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18737  break;
18738  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18739  IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18740  break;
18741  }
18742  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18743  llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18744  llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18745  return Builder.CreateCall(F, {Addr, Val});
18746  }
18747  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18748  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18749  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
18750  Intrinsic::ID IID;
18751  llvm::Type *ArgTy;
18752  switch (BuiltinID) {
18753  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18754  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18755  IID = Intrinsic::amdgcn_ds_fadd;
18756  break;
18757  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18758  ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18759  IID = Intrinsic::amdgcn_ds_fadd;
18760  break;
18761  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
18762  ArgTy = llvm::FixedVectorType::get(
18763  llvm::Type::getHalfTy(getLLVMContext()), 2);
18764  IID = Intrinsic::amdgcn_ds_fadd;
18765  break;
18766  }
18767  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18768  llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18769  llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
18770  llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
18771  llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
18772  llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
18773  llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
18774  return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
18775  }
18776  case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18777  case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18778  case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18779  case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: {
18780 
18781  Intrinsic::ID IID;
18782  switch (BuiltinID) {
18783  case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18784  case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18785  IID = Intrinsic::amdgcn_global_load_tr_b64;
18786  break;
18787  case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18788  case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18789  IID = Intrinsic::amdgcn_global_load_tr_b128;
18790  break;
18791  }
18792  llvm::Type *LoadTy = ConvertType(E->getType());
18793  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18794  llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
18795  return Builder.CreateCall(F, {Addr});
18796  }
18797  case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18798  Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18799  {llvm::Type::getInt64Ty(getLLVMContext())});
18800  return Builder.CreateCall(F);
18801  }
18802  case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18803  Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18804  {llvm::Type::getInt64Ty(getLLVMContext())});
18805  llvm::Value *Env = EmitScalarExpr(E->getArg(0));
18806  return Builder.CreateCall(F, {Env});
18807  }
18808  case AMDGPU::BI__builtin_amdgcn_read_exec:
18809  return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
18810  case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18811  return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
18812  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18813  return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
18814  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18815  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18816  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18817  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18818  llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
18819  llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
18820  llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
18821  llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
18822  llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
18823  llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18824 
18825  // The builtins take these arguments as vec4 where the last element is
18826  // ignored. The intrinsic takes them as vec3.
18827  RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18828  ArrayRef<int>{0, 1, 2});
18829  RayDir =
18830  Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18831  RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18832  ArrayRef<int>{0, 1, 2});
18833 
18834  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18835  {NodePtr->getType(), RayDir->getType()});
18836  return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
18837  RayInverseDir, TextureDescr});
18838  }
18839 
18840  case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18842  for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18843  Args.push_back(EmitScalarExpr(E->getArg(i)));
18844 
18845  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18846  Value *Call = Builder.CreateCall(F, Args);
18847  Value *Rtn = Builder.CreateExtractValue(Call, 0);
18848  Value *A = Builder.CreateExtractValue(Call, 1);
18849  llvm::Type *RetTy = ConvertType(E->getType());
18850  Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
18851  (uint64_t)0);
18852  return Builder.CreateInsertElement(I0, A, 1);
18853  }
18854 
18855  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18856  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18857  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18858  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18859  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18860  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18861  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18862  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18863  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18864  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18865  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18866  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18867  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18868  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18869  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18870  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18871  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18872  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18873  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18874  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18875  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18876  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18877  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18878  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18879  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18880  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18881  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18882  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18883  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18884  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18885  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18886  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18887  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18888  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18889  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18890  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18891  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18892  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18893  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18894  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18895  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18896  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18897  case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18898  case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18899  case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18900  case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18901  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18902  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18903  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18904  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18905  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18906  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18907  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18908  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18909  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18910  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18911  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18912  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18913  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18914  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
18915 
18916  // These operations perform a matrix multiplication and accumulation of
18917  // the form:
18918  // D = A * B + C
18919  // We need to specify one type for matrices AB and one for matrices CD.
18920  // Sparse matrix operations can have different types for A and B as well as
18921  // an additional type for sparsity index.
18922  // Destination type should be put before types used for source operands.
18923  SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18924  // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18925  // There is no need for the variable opsel argument, so always set it to
18926  // "false".
18927  bool AppendFalseForOpselArg = false;
18928  unsigned BuiltinWMMAOp;
18929 
18930  switch (BuiltinID) {
18931  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18932  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18933  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18934  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18935  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18936  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18937  break;
18938  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18939  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18940  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18941  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18942  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18943  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18944  break;
18945  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18946  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18947  AppendFalseForOpselArg = true;
18948  LLVM_FALLTHROUGH;
18949  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18950  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18951  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18952  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18953  break;
18954  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18955  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18956  AppendFalseForOpselArg = true;
18957  LLVM_FALLTHROUGH;
18958  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18959  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18960  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18961  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
18962  break;
18963  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18964  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18965  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18966  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
18967  break;
18968  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18969  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18970  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18971  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
18972  break;
18973  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18974  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18975  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18976  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18977  ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18978  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
18979  break;
18980  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18981  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18982  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18983  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18984  ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18985  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
18986  break;
18987  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18988  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18989  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18990  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
18991  break;
18992  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18993  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18994  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18995  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
18996  break;
18997  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18998  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18999  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19000  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
19001  break;
19002  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19003  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19004  ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19005  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
19006  break;
19007  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19008  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19009  ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19010  BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
19011  break;
19012  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19013  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19014  ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19015  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
19016  break;
19017  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19018  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19019  ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19020  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
19021  break;
19022  case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19023  case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19024  ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19025  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
19026  break;
19027  case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19028  case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19029  ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19030  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
19031  break;
19032  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19033  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19034  ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19035  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
19036  break;
19037  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19038  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19039  ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19040  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
19041  break;
19042  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19043  case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19044  ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19045  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
19046  break;
19047  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19048  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19049  ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19050  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
19051  break;
19052  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19053  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19054  ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19055  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
19056  break;
19057  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19058  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19059  ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19060  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
19061  break;
19062  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19063  case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
19064  ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19065  BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
19066  break;
19067  }
19068 
19070  for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19071  Args.push_back(EmitScalarExpr(E->getArg(i)));
19072  if (AppendFalseForOpselArg)
19073  Args.push_back(Builder.getFalse());
19074 
19076  for (auto ArgIdx : ArgsForMatchingMatrixTypes)
19077  ArgTypes.push_back(Args[ArgIdx]->getType());
19078 
19079  Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
19080  return Builder.CreateCall(F, Args);
19081  }
19082 
19083  // amdgcn workitem
19084  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
19085  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
19086  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
19087  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
19088  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
19089  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
19090 
19091  // amdgcn workgroup size
19092  case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
19093  return EmitAMDGPUWorkGroupSize(*this, 0);
19094  case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
19095  return EmitAMDGPUWorkGroupSize(*this, 1);
19096  case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
19097  return EmitAMDGPUWorkGroupSize(*this, 2);
19098 
19099  // amdgcn grid size
19100  case AMDGPU::BI__builtin_amdgcn_grid_size_x:
19101  return EmitAMDGPUGridSize(*this, 0);
19102  case AMDGPU::BI__builtin_amdgcn_grid_size_y:
19103  return EmitAMDGPUGridSize(*this, 1);
19104  case AMDGPU::BI__builtin_amdgcn_grid_size_z:
19105  return EmitAMDGPUGridSize(*this, 2);
19106 
19107  // r600 intrinsics
19108  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
19109  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
19110  return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
19111  case AMDGPU::BI__builtin_r600_read_tidig_x:
19112  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
19113  case AMDGPU::BI__builtin_r600_read_tidig_y:
19114  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
19115  case AMDGPU::BI__builtin_r600_read_tidig_z:
19116  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
19117  case AMDGPU::BI__builtin_amdgcn_alignbit: {
19118  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19119  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19120  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19121  Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
19122  return Builder.CreateCall(F, { Src0, Src1, Src2 });
19123  }
19124  case AMDGPU::BI__builtin_amdgcn_fence: {
19125  ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
19126  EmitScalarExpr(E->getArg(1)), AO, SSID);
19127  return Builder.CreateFence(AO, SSID);
19128  }
19129  case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19130  case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19131  case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19132  case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
19133  llvm::AtomicRMWInst::BinOp BinOp;
19134  switch (BuiltinID) {
19135  case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19136  case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19137  BinOp = llvm::AtomicRMWInst::UIncWrap;
19138  break;
19139  case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19140  case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19141  BinOp = llvm::AtomicRMWInst::UDecWrap;
19142  break;
19143  }
19144 
19145  Address Ptr = CheckAtomicAlignment(*this, E);
19146  Value *Val = EmitScalarExpr(E->getArg(1));
19147 
19148  ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
19149  EmitScalarExpr(E->getArg(3)), AO, SSID);
19150 
19151  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19152  bool Volatile =
19153  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19154 
19155  llvm::AtomicRMWInst *RMW =
19156  Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
19157  if (Volatile)
19158  RMW->setVolatile(true);
19159  return RMW;
19160  }
19161  case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19162  case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
19163  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
19164  llvm::Type *ResultType = ConvertType(E->getType());
19165  // s_sendmsg_rtn is mangled using return type only.
19166  Function *F =
19167  CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
19168  return Builder.CreateCall(F, {Arg});
19169  }
19170  default:
19171  return nullptr;
19172  }
19173 }
19174 
19175 /// Handle a SystemZ function in which the final argument is a pointer
19176 /// to an int that receives the post-instruction CC value. At the LLVM level
19177 /// this is represented as a function that returns a {result, cc} pair.
19179  unsigned IntrinsicID,
19180  const CallExpr *E) {
19181  unsigned NumArgs = E->getNumArgs() - 1;
19182  SmallVector<Value *, 8> Args(NumArgs);
19183  for (unsigned I = 0; I < NumArgs; ++I)
19184  Args[I] = CGF.EmitScalarExpr(E->getArg(I));
19185  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
19186  Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
19187  Value *Call = CGF.Builder.CreateCall(F, Args);
19188  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
19189  CGF.Builder.CreateStore(CC, CCPtr);
19190  return CGF.Builder.CreateExtractValue(Call, 0);
19191 }
19192 
19194  const CallExpr *E) {
19195  switch (BuiltinID) {
19196  case SystemZ::BI__builtin_tbegin: {
19197  Value *TDB = EmitScalarExpr(E->getArg(0));
19198  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19199  Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
19200  return Builder.CreateCall(F, {TDB, Control});
19201  }
19202  case SystemZ::BI__builtin_tbegin_nofloat: {
19203  Value *TDB = EmitScalarExpr(E->getArg(0));
19204  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19205  Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
19206  return Builder.CreateCall(F, {TDB, Control});
19207  }
19208  case SystemZ::BI__builtin_tbeginc: {
19209  Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
19210  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
19211  Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
19212  return Builder.CreateCall(F, {TDB, Control});
19213  }
19214  case SystemZ::BI__builtin_tabort: {
19215  Value *Data = EmitScalarExpr(E->getArg(0));
19216  Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
19217  return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
19218  }
19219  case SystemZ::BI__builtin_non_tx_store: {
19220  Value *Address = EmitScalarExpr(E->getArg(0));
19221  Value *Data = EmitScalarExpr(E->getArg(1));
19222  Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
19223  return Builder.CreateCall(F, {Data, Address});
19224  }
19225 
19226  // Vector builtins. Note that most vector builtins are mapped automatically
19227  // to target-specific LLVM intrinsics. The ones handled specially here can
19228  // be represented via standard LLVM IR, which is preferable to enable common
19229  // LLVM optimizations.
19230 
19231  case SystemZ::BI__builtin_s390_vpopctb:
19232  case SystemZ::BI__builtin_s390_vpopcth:
19233  case SystemZ::BI__builtin_s390_vpopctf:
19234  case SystemZ::BI__builtin_s390_vpopctg: {
19235  llvm::Type *ResultType = ConvertType(E->getType());
19236  Value *X = EmitScalarExpr(E->getArg(0));
19237  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
19238  return Builder.CreateCall(F, X);
19239  }
19240 
19241  case SystemZ::BI__builtin_s390_vclzb:
19242  case SystemZ::BI__builtin_s390_vclzh:
19243  case SystemZ::BI__builtin_s390_vclzf:
19244  case SystemZ::BI__builtin_s390_vclzg: {
19245  llvm::Type *ResultType = ConvertType(E->getType());
19246  Value *X = EmitScalarExpr(E->getArg(0));
19247  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19248  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
19249  return Builder.CreateCall(F, {X, Undef});
19250  }
19251 
19252  case SystemZ::BI__builtin_s390_vctzb:
19253  case SystemZ::BI__builtin_s390_vctzh:
19254  case SystemZ::BI__builtin_s390_vctzf:
19255  case SystemZ::BI__builtin_s390_vctzg: {
19256  llvm::Type *ResultType = ConvertType(E->getType());
19257  Value *X = EmitScalarExpr(E->getArg(0));
19258  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19259  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
19260  return Builder.CreateCall(F, {X, Undef});
19261  }
19262 
19263  case SystemZ::BI__builtin_s390_verllb:
19264  case SystemZ::BI__builtin_s390_verllh:
19265  case SystemZ::BI__builtin_s390_verllf:
19266  case SystemZ::BI__builtin_s390_verllg: {
19267  llvm::Type *ResultType = ConvertType(E->getType());
19268  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19269  llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19270  // Splat scalar rotate amount to vector type.
19271  unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
19272  Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
19273  Amt = Builder.CreateVectorSplat(NumElts, Amt);
19274  Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19275  return Builder.CreateCall(F, { Src, Src, Amt });
19276  }
19277 
19278  case SystemZ::BI__builtin_s390_verllvb:
19279  case SystemZ::BI__builtin_s390_verllvh:
19280  case SystemZ::BI__builtin_s390_verllvf:
19281  case SystemZ::BI__builtin_s390_verllvg: {
19282  llvm::Type *ResultType = ConvertType(E->getType());
19283  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19284  llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19285  Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19286  return Builder.CreateCall(F, { Src, Src, Amt });
19287  }
19288 
19289  case SystemZ::BI__builtin_s390_vfsqsb:
19290  case SystemZ::BI__builtin_s390_vfsqdb: {
19291  llvm::Type *ResultType = ConvertType(E->getType());
19292  Value *X = EmitScalarExpr(E->getArg(0));
19293  if (Builder.getIsFPConstrained()) {
19294  Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19295  return Builder.CreateConstrainedFPCall(F, { X });
19296  } else {
19297  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19298  return Builder.CreateCall(F, X);
19299  }
19300  }
19301  case SystemZ::BI__builtin_s390_vfmasb:
19302  case SystemZ::BI__builtin_s390_vfmadb: {
19303  llvm::Type *ResultType = ConvertType(E->getType());
19304  Value *X = EmitScalarExpr(E->getArg(0));
19305  Value *Y = EmitScalarExpr(E->getArg(1));
19306  Value *Z = EmitScalarExpr(E->getArg(2));
19307  if (Builder.getIsFPConstrained()) {
19308  Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19309  return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
19310  } else {
19311  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19312  return Builder.CreateCall(F, {X, Y, Z});
19313  }
19314  }
19315  case SystemZ::BI__builtin_s390_vfmssb:
19316  case SystemZ::BI__builtin_s390_vfmsdb: {
19317  llvm::Type *ResultType = ConvertType(E->getType());
19318  Value *X = EmitScalarExpr(E->getArg(0));
19319  Value *Y = EmitScalarExpr(E->getArg(1));
19320  Value *Z = EmitScalarExpr(E->getArg(2));
19321  if (Builder.getIsFPConstrained()) {
19322  Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19323  return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19324  } else {
19325  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19326  return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19327  }
19328  }
19329  case SystemZ::BI__builtin_s390_vfnmasb:
19330  case SystemZ::BI__builtin_s390_vfnmadb: {
19331  llvm::Type *ResultType = ConvertType(E->getType());
19332  Value *X = EmitScalarExpr(E->getArg(0));
19333  Value *Y = EmitScalarExpr(E->getArg(1));
19334  Value *Z = EmitScalarExpr(E->getArg(2));
19335  if (Builder.getIsFPConstrained()) {
19336  Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19337  return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
19338  } else {
19339  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19340  return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
19341  }
19342  }
19343  case SystemZ::BI__builtin_s390_vfnmssb:
19344  case SystemZ::BI__builtin_s390_vfnmsdb: {
19345  llvm::Type *ResultType = ConvertType(E->getType());
19346  Value *X = EmitScalarExpr(E->getArg(0));
19347  Value *Y = EmitScalarExpr(E->getArg(1));
19348  Value *Z = EmitScalarExpr(E->getArg(2));
19349  if (Builder.getIsFPConstrained()) {
19350  Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19351  Value *NegZ = Builder.CreateFNeg(Z, "sub");
19352  return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
19353  } else {
19354  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19355  Value *NegZ = Builder.CreateFNeg(Z, "neg");
19356  return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
19357  }
19358  }
19359  case SystemZ::BI__builtin_s390_vflpsb:
19360  case SystemZ::BI__builtin_s390_vflpdb: {
19361  llvm::Type *ResultType = ConvertType(E->getType());
19362  Value *X = EmitScalarExpr(E->getArg(0));
19363  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19364  return Builder.CreateCall(F, X);
19365  }
19366  case SystemZ::BI__builtin_s390_vflnsb:
19367  case SystemZ::BI__builtin_s390_vflndb: {
19368  llvm::Type *ResultType = ConvertType(E->getType());
19369  Value *X = EmitScalarExpr(E->getArg(0));
19370  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19371  return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
19372  }
19373  case SystemZ::BI__builtin_s390_vfisb:
19374  case SystemZ::BI__builtin_s390_vfidb: {
19375  llvm::Type *ResultType = ConvertType(E->getType());
19376  Value *X = EmitScalarExpr(E->getArg(0));
19377  // Constant-fold the M4 and M5 mask arguments.
19378  llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
19379  llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19380  // Check whether this instance can be represented via a LLVM standard
19381  // intrinsic. We only support some combinations of M4 and M5.
19382  Intrinsic::ID ID = Intrinsic::not_intrinsic;
19383  Intrinsic::ID CI;
19384  switch (M4.getZExtValue()) {
19385  default: break;
19386  case 0: // IEEE-inexact exception allowed
19387  switch (M5.getZExtValue()) {
19388  default: break;
19389  case 0: ID = Intrinsic::rint;
19390  CI = Intrinsic::experimental_constrained_rint; break;
19391  }
19392  break;
19393  case 4: // IEEE-inexact exception suppressed
19394  switch (M5.getZExtValue()) {
19395  default: break;
19396  case 0: ID = Intrinsic::nearbyint;
19397  CI = Intrinsic::experimental_constrained_nearbyint; break;
19398  case 1: ID = Intrinsic::round;
19399  CI = Intrinsic::experimental_constrained_round; break;
19400  case 5: ID = Intrinsic::trunc;
19401  CI = Intrinsic::experimental_constrained_trunc; break;
19402  case 6: ID = Intrinsic::ceil;
19403  CI = Intrinsic::experimental_constrained_ceil; break;
19404  case 7: ID = Intrinsic::floor;
19405  CI = Intrinsic::experimental_constrained_floor; break;
19406  }
19407  break;
19408  }
19409  if (ID != Intrinsic::not_intrinsic) {
19410  if (Builder.getIsFPConstrained()) {
19411  Function *F = CGM.getIntrinsic(CI, ResultType);
19412  return Builder.CreateConstrainedFPCall(F, X);
19413  } else {
19414  Function *F = CGM.getIntrinsic(ID, ResultType);
19415  return Builder.CreateCall(F, X);
19416  }
19417  }
19418  switch (BuiltinID) { // FIXME: constrained version?
19419  case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19420  case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19421  default: llvm_unreachable("Unknown BuiltinID");
19422  }
19423  Function *F = CGM.getIntrinsic(ID);
19424  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19425  Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
19426  return Builder.CreateCall(F, {X, M4Value, M5Value});
19427  }
19428  case SystemZ::BI__builtin_s390_vfmaxsb:
19429  case SystemZ::BI__builtin_s390_vfmaxdb: {
19430  llvm::Type *ResultType = ConvertType(E->getType());
19431  Value *X = EmitScalarExpr(E->getArg(0));
19432  Value *Y = EmitScalarExpr(E->getArg(1));
19433  // Constant-fold the M4 mask argument.
19434  llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19435  // Check whether this instance can be represented via a LLVM standard
19436  // intrinsic. We only support some values of M4.
19437  Intrinsic::ID ID = Intrinsic::not_intrinsic;
19438  Intrinsic::ID CI;
19439  switch (M4.getZExtValue()) {
19440  default: break;
19441  case 4: ID = Intrinsic::maxnum;
19442  CI = Intrinsic::experimental_constrained_maxnum; break;
19443  }
19444  if (ID != Intrinsic::not_intrinsic) {
19445  if (Builder.getIsFPConstrained()) {
19446  Function *F = CGM.getIntrinsic(CI, ResultType);
19447  return Builder.CreateConstrainedFPCall(F, {X, Y});
19448  } else {
19449  Function *F = CGM.getIntrinsic(ID, ResultType);
19450  return Builder.CreateCall(F, {X, Y});
19451  }
19452  }
19453  switch (BuiltinID) {
19454  case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19455  case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19456  default: llvm_unreachable("Unknown BuiltinID");
19457  }
19458  Function *F = CGM.getIntrinsic(ID);
19459  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19460  return Builder.CreateCall(F, {X, Y, M4Value});
19461  }
19462  case SystemZ::BI__builtin_s390_vfminsb:
19463  case SystemZ::BI__builtin_s390_vfmindb: {
19464  llvm::Type *ResultType = ConvertType(E->getType());
19465  Value *X = EmitScalarExpr(E->getArg(0));
19466  Value *Y = EmitScalarExpr(E->getArg(1));
19467  // Constant-fold the M4 mask argument.
19468  llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19469  // Check whether this instance can be represented via a LLVM standard
19470  // intrinsic. We only support some values of M4.
19471  Intrinsic::ID ID = Intrinsic::not_intrinsic;
19472  Intrinsic::ID CI;
19473  switch (M4.getZExtValue()) {
19474  default: break;
19475  case 4: ID = Intrinsic::minnum;
19476  CI = Intrinsic::experimental_constrained_minnum; break;
19477  }
19478  if (ID != Intrinsic::not_intrinsic) {
19479  if (Builder.getIsFPConstrained()) {
19480  Function *F = CGM.getIntrinsic(CI, ResultType);
19481  return Builder.CreateConstrainedFPCall(F, {X, Y});
19482  } else {
19483  Function *F = CGM.getIntrinsic(ID, ResultType);
19484  return Builder.CreateCall(F, {X, Y});
19485  }
19486  }
19487  switch (BuiltinID) {
19488  case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19489  case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19490  default: llvm_unreachable("Unknown BuiltinID");
19491  }
19492  Function *F = CGM.getIntrinsic(ID);
19493  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19494  return Builder.CreateCall(F, {X, Y, M4Value});
19495  }
19496 
19497  case SystemZ::BI__builtin_s390_vlbrh:
19498  case SystemZ::BI__builtin_s390_vlbrf:
19499  case SystemZ::BI__builtin_s390_vlbrg: {
19500  llvm::Type *ResultType = ConvertType(E->getType());
19501  Value *X = EmitScalarExpr(E->getArg(0));
19502  Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19503  return Builder.CreateCall(F, X);
19504  }
19505 
19506  // Vector intrinsics that output the post-instruction CC value.
19507 
19508 #define INTRINSIC_WITH_CC(NAME) \
19509  case SystemZ::BI__builtin_##NAME: \
19510  return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19511 
19512  INTRINSIC_WITH_CC(s390_vpkshs);
19513  INTRINSIC_WITH_CC(s390_vpksfs);
19514  INTRINSIC_WITH_CC(s390_vpksgs);
19515 
19516  INTRINSIC_WITH_CC(s390_vpklshs);
19517  INTRINSIC_WITH_CC(s390_vpklsfs);
19518  INTRINSIC_WITH_CC(s390_vpklsgs);
19519 
19520  INTRINSIC_WITH_CC(s390_vceqbs);
19521  INTRINSIC_WITH_CC(s390_vceqhs);
19522  INTRINSIC_WITH_CC(s390_vceqfs);
19523  INTRINSIC_WITH_CC(s390_vceqgs);
19524 
19525  INTRINSIC_WITH_CC(s390_vchbs);
19526  INTRINSIC_WITH_CC(s390_vchhs);
19527  INTRINSIC_WITH_CC(s390_vchfs);
19528  INTRINSIC_WITH_CC(s390_vchgs);
19529 
19530  INTRINSIC_WITH_CC(s390_vchlbs);
19531  INTRINSIC_WITH_CC(s390_vchlhs);
19532  INTRINSIC_WITH_CC(s390_vchlfs);
19533  INTRINSIC_WITH_CC(s390_vchlgs);
19534 
19535  INTRINSIC_WITH_CC(s390_vfaebs);
19536  INTRINSIC_WITH_CC(s390_vfaehs);
19537  INTRINSIC_WITH_CC(s390_vfaefs);
19538 
19539  INTRINSIC_WITH_CC(s390_vfaezbs);
19540  INTRINSIC_WITH_CC(s390_vfaezhs);
19541  INTRINSIC_WITH_CC(s390_vfaezfs);
19542 
19543  INTRINSIC_WITH_CC(s390_vfeebs);
19544  INTRINSIC_WITH_CC(s390_vfeehs);
19545  INTRINSIC_WITH_CC(s390_vfeefs);
19546 
19547  INTRINSIC_WITH_CC(s390_vfeezbs);
19548  INTRINSIC_WITH_CC(s390_vfeezhs);
19549  INTRINSIC_WITH_CC(s390_vfeezfs);
19550 
19551  INTRINSIC_WITH_CC(s390_vfenebs);
19552  INTRINSIC_WITH_CC(s390_vfenehs);
19553  INTRINSIC_WITH_CC(s390_vfenefs);
19554 
19555  INTRINSIC_WITH_CC(s390_vfenezbs);
19556  INTRINSIC_WITH_CC(s390_vfenezhs);
19557  INTRINSIC_WITH_CC(s390_vfenezfs);
19558 
19559  INTRINSIC_WITH_CC(s390_vistrbs);
19560  INTRINSIC_WITH_CC(s390_vistrhs);
19561  INTRINSIC_WITH_CC(s390_vistrfs);
19562 
19563  INTRINSIC_WITH_CC(s390_vstrcbs);
19564  INTRINSIC_WITH_CC(s390_vstrchs);
19565  INTRINSIC_WITH_CC(s390_vstrcfs);
19566 
19567  INTRINSIC_WITH_CC(s390_vstrczbs);
19568  INTRINSIC_WITH_CC(s390_vstrczhs);
19569  INTRINSIC_WITH_CC(s390_vstrczfs);
19570 
19571  INTRINSIC_WITH_CC(s390_vfcesbs);
19572  INTRINSIC_WITH_CC(s390_vfcedbs);
19573  INTRINSIC_WITH_CC(s390_vfchsbs);
19574  INTRINSIC_WITH_CC(s390_vfchdbs);
19575  INTRINSIC_WITH_CC(s390_vfchesbs);
19576  INTRINSIC_WITH_CC(s390_vfchedbs);
19577 
19578  INTRINSIC_WITH_CC(s390_vftcisb);
19579  INTRINSIC_WITH_CC(s390_vftcidb);
19580 
19581  INTRINSIC_WITH_CC(s390_vstrsb);
19582  INTRINSIC_WITH_CC(s390_vstrsh);
19583  INTRINSIC_WITH_CC(s390_vstrsf);
19584 
19585  INTRINSIC_WITH_CC(s390_vstrszb);
19586  INTRINSIC_WITH_CC(s390_vstrszh);
19587  INTRINSIC_WITH_CC(s390_vstrszf);
19588 
19589 #undef INTRINSIC_WITH_CC
19590 
19591  default:
19592  return nullptr;
19593  }
19594 }
19595 
19596 namespace {
19597 // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19598 struct NVPTXMmaLdstInfo {
19599  unsigned NumResults; // Number of elements to load/store
19600  // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19601  unsigned IID_col;
19602  unsigned IID_row;
19603 };
19604 
19605 #define MMA_INTR(geom_op_type, layout) \
19606  Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19607 #define MMA_LDST(n, geom_op_type) \
19608  { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19609 
19610 static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19611  switch (BuiltinID) {
19612  // FP MMA loads
19613  case NVPTX::BI__hmma_m16n16k16_ld_a:
19614  return MMA_LDST(8, m16n16k16_load_a_f16);
19615  case NVPTX::BI__hmma_m16n16k16_ld_b:
19616  return MMA_LDST(8, m16n16k16_load_b_f16);
19617  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19618  return MMA_LDST(4, m16n16k16_load_c_f16);
19619  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19620  return MMA_LDST(8, m16n16k16_load_c_f32);
19621  case NVPTX::BI__hmma_m32n8k16_ld_a:
19622  return MMA_LDST(8, m32n8k16_load_a_f16);
19623  case NVPTX::BI__hmma_m32n8k16_ld_b:
19624  return MMA_LDST(8, m32n8k16_load_b_f16);
19625  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19626  return MMA_LDST(4, m32n8k16_load_c_f16);
19627  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19628  return MMA_LDST(8, m32n8k16_load_c_f32);
19629  case NVPTX::BI__hmma_m8n32k16_ld_a:
19630  return MMA_LDST(8, m8n32k16_load_a_f16);
19631  case NVPTX::BI__hmma_m8n32k16_ld_b:
19632  return MMA_LDST(8, m8n32k16_load_b_f16);
19633  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19634  return MMA_LDST(4, m8n32k16_load_c_f16);
19635  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19636  return MMA_LDST(8, m8n32k16_load_c_f32);
19637 
19638  // Integer MMA loads
19639  case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19640  return MMA_LDST(2, m16n16k16_load_a_s8);
19641  case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19642  return MMA_LDST(2, m16n16k16_load_a_u8);
19643  case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19644  return MMA_LDST(2, m16n16k16_load_b_s8);
19645  case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19646  return MMA_LDST(2, m16n16k16_load_b_u8);
19647  case NVPTX::BI__imma_m16n16k16_ld_c:
19648  return MMA_LDST(8, m16n16k16_load_c_s32);
19649  case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19650  return MMA_LDST(4, m32n8k16_load_a_s8);
19651  case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19652  return MMA_LDST(4, m32n8k16_load_a_u8);
19653  case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19654  return MMA_LDST(1, m32n8k16_load_b_s8);
19655  case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19656  return MMA_LDST(1, m32n8k16_load_b_u8);
19657  case NVPTX::BI__imma_m32n8k16_ld_c:
19658  return MMA_LDST(8, m32n8k16_load_c_s32);
19659  case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19660  return MMA_LDST(1, m8n32k16_load_a_s8);
19661  case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19662  return MMA_LDST(1, m8n32k16_load_a_u8);
19663  case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19664  return MMA_LDST(4, m8n32k16_load_b_s8);
19665  case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19666  return MMA_LDST(4, m8n32k16_load_b_u8);
19667  case NVPTX::BI__imma_m8n32k16_ld_c:
19668  return MMA_LDST(8, m8n32k16_load_c_s32);
19669 
19670  // Sub-integer MMA loads.
19671  // Only row/col layout is supported by A/B fragments.
19672  case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19673  return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
19674  case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19675  return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
19676  case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19677  return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
19678  case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19679  return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
19680  case NVPTX::BI__imma_m8n8k32_ld_c:
19681  return MMA_LDST(2, m8n8k32_load_c_s32);
19682  case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19683  return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
19684  case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19685  return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
19686  case NVPTX::BI__bmma_m8n8k128_ld_c:
19687  return MMA_LDST(2, m8n8k128_load_c_s32);
19688 
19689  // Double MMA loads
19690  case NVPTX::BI__dmma_m8n8k4_ld_a:
19691  return MMA_LDST(1, m8n8k4_load_a_f64);
19692  case NVPTX::BI__dmma_m8n8k4_ld_b:
19693  return MMA_LDST(1, m8n8k4_load_b_f64);
19694  case NVPTX::BI__dmma_m8n8k4_ld_c:
19695  return MMA_LDST(2, m8n8k4_load_c_f64);
19696 
19697  // Alternate float MMA loads
19698  case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19699  return MMA_LDST(4, m16n16k16_load_a_bf16);
19700  case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19701  return MMA_LDST(4, m16n16k16_load_b_bf16);
19702  case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19703  return MMA_LDST(2, m8n32k16_load_a_bf16);
19704  case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19705  return MMA_LDST(8, m8n32k16_load_b_bf16);
19706  case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19707  return MMA_LDST(8, m32n8k16_load_a_bf16);
19708  case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19709  return MMA_LDST(2, m32n8k16_load_b_bf16);
19710  case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19711  return MMA_LDST(4, m16n16k8_load_a_tf32);
19712  case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19713  return MMA_LDST(4, m16n16k8_load_b_tf32);
19714  case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
19715  return MMA_LDST(8, m16n16k8_load_c_f32);
19716 
19717  // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
19718  // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
19719  // use fragment C for both loads and stores.
19720  // FP MMA stores.
19721  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19722  return MMA_LDST(4, m16n16k16_store_d_f16);
19723  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19724  return MMA_LDST(8, m16n16k16_store_d_f32);
19725  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19726  return MMA_LDST(4, m32n8k16_store_d_f16);
19727  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19728  return MMA_LDST(8, m32n8k16_store_d_f32);
19729  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19730  return MMA_LDST(4, m8n32k16_store_d_f16);
19731  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19732  return MMA_LDST(8, m8n32k16_store_d_f32);
19733 
19734  // Integer and sub-integer MMA stores.
19735  // Another naming quirk. Unlike other MMA builtins that use PTX types in the
19736  // name, integer loads/stores use LLVM's i32.
19737  case NVPTX::BI__imma_m16n16k16_st_c_i32:
19738  return MMA_LDST(8, m16n16k16_store_d_s32);
19739  case NVPTX::BI__imma_m32n8k16_st_c_i32:
19740  return MMA_LDST(8, m32n8k16_store_d_s32);
19741  case NVPTX::BI__imma_m8n32k16_st_c_i32:
19742  return MMA_LDST(8, m8n32k16_store_d_s32);
19743  case NVPTX::BI__imma_m8n8k32_st_c_i32:
19744  return MMA_LDST(2, m8n8k32_store_d_s32);
19745  case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19746  return MMA_LDST(2, m8n8k128_store_d_s32);
19747 
19748  // Double MMA store
19749  case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19750  return MMA_LDST(2, m8n8k4_store_d_f64);
19751 
19752  // Alternate float MMA store
19753  case NVPTX::BI__mma_m16n16k8_st_c_f32:
19754  return MMA_LDST(8, m16n16k8_store_d_f32);
19755 
19756  default:
19757  llvm_unreachable("Unknown MMA builtin");
19758  }
19759 }
19760 #undef MMA_LDST
19761 #undef MMA_INTR
19762 
19763 
19764 struct NVPTXMmaInfo {
19765  unsigned NumEltsA;
19766  unsigned NumEltsB;
19767  unsigned NumEltsC;
19768  unsigned NumEltsD;
19769 
19770  // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
19771  // over 'col' for layout. The index of non-satf variants is expected to match
19772  // the undocumented layout constants used by CUDA's mma.hpp.
19773  std::array<unsigned, 8> Variants;
19774 
19775  unsigned getMMAIntrinsic(int Layout, bool Satf) {
19776  unsigned Index = Layout + 4 * Satf;
19777  if (Index >= Variants.size())
19778  return 0;
19779  return Variants[Index];
19780  }
19781 };
19782 
19783  // Returns an intrinsic that matches Layout and Satf for valid combinations of
19784  // Layout and Satf, 0 otherwise.
19785 static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
19786  // clang-format off
19787 #define MMA_VARIANTS(geom, type) \
19788  Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
19789  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19790  Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
19791  Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
19792 #define MMA_SATF_VARIANTS(geom, type) \
19793  MMA_VARIANTS(geom, type), \
19794  Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
19795  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19796  Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
19797  Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
19798 // Sub-integer MMA only supports row.col layout.
19799 #define MMA_VARIANTS_I4(geom, type) \
19800  0, \
19801  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19802  0, \
19803  0, \
19804  0, \
19805  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19806  0, \
19807  0
19808 // b1 MMA does not support .satfinite.
19809 #define MMA_VARIANTS_B1_XOR(geom, type) \
19810  0, \
19811  Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
19812  0, \
19813  0, \
19814  0, \
19815  0, \
19816  0, \
19817  0
19818 #define MMA_VARIANTS_B1_AND(geom, type) \
19819  0, \
19820  Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
19821  0, \
19822  0, \
19823  0, \
19824  0, \
19825  0, \
19826  0
19827  // clang-format on
19828  switch (BuiltinID) {
19829  // FP MMA
19830  // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
19831  // NumEltsN of return value are ordered as A,B,C,D.
19832  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19833  return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
19834  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19835  return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
19836  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19837  return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
19838  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19839  return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
19840  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19841  return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
19842  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19843  return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
19844  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19845  return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
19846  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19847  return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
19848  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19849  return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
19850  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19851  return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
19852  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19853  return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
19854  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19855  return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
19856 
19857  // Integer MMA
19858  case NVPTX::BI__imma_m16n16k16_mma_s8:
19859  return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
19860  case NVPTX::BI__imma_m16n16k16_mma_u8:
19861  return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
19862  case NVPTX::BI__imma_m32n8k16_mma_s8:
19863  return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
19864  case NVPTX::BI__imma_m32n8k16_mma_u8:
19865  return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
19866  case NVPTX::BI__imma_m8n32k16_mma_s8:
19867  return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
19868  case NVPTX::BI__imma_m8n32k16_mma_u8:
19869  return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
19870 
19871  // Sub-integer MMA
19872  case NVPTX::BI__imma_m8n8k32_mma_s4:
19873  return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
19874  case NVPTX::BI__imma_m8n8k32_mma_u4:
19875  return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
19876  case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
19877  return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
19878  case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
19879  return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
19880 
19881  // Double MMA
19882  case NVPTX::BI__dmma_m8n8k4_mma_f64:
19883  return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
19884 
19885  // Alternate FP MMA
19886  case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
19887  return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
19888  case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
19889  return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
19890  case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
19891  return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
19892  case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
19893  return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
19894  default:
19895  llvm_unreachable("Unexpected builtin ID.");
19896  }
19897 #undef MMA_VARIANTS
19898 #undef MMA_SATF_VARIANTS
19899 #undef MMA_VARIANTS_I4
19900 #undef MMA_VARIANTS_B1_AND
19901 #undef MMA_VARIANTS_B1_XOR
19902 }
19903 
19904 static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
19905  const CallExpr *E) {
19906  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19907  QualType ArgType = E->getArg(0)->getType();
19909  llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
19910  return CGF.Builder.CreateCall(
19911  CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19912  {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
19913 }
19914 
19915 static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
19916  const CallExpr *E) {
19917  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19918  llvm::Type *ElemTy =
19920  return CGF.Builder.CreateCall(
19921  CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19922  {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
19923 }
19924 
19925 static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
19926  CodeGenFunction &CGF, const CallExpr *E,
19927  int SrcSize) {
19928  return E->getNumArgs() == 3
19929  ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
19930  {CGF.EmitScalarExpr(E->getArg(0)),
19931  CGF.EmitScalarExpr(E->getArg(1)),
19932  CGF.EmitScalarExpr(E->getArg(2))})
19933  : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
19934  {CGF.EmitScalarExpr(E->getArg(0)),
19935  CGF.EmitScalarExpr(E->getArg(1))});
19936 }
19937 
19938 static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
19939  const CallExpr *E, CodeGenFunction &CGF) {
19940  auto &C = CGF.CGM.getContext();
19941  if (!(C.getLangOpts().NativeHalfType ||
19942  !C.getTargetInfo().useFP16ConversionIntrinsics())) {
19943  CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
19944  " requires native half type support.");
19945  return nullptr;
19946  }
19947 
19948  if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
19949  IntrinsicID == Intrinsic::nvvm_ldu_global_f)
19950  return MakeLdgLdu(IntrinsicID, CGF, E);
19951 
19953  auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
19954  auto *FTy = F->getFunctionType();
19955  unsigned ICEArguments = 0;
19957  C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
19958  assert(Error == ASTContext::GE_None && "Should not codegen an error");
19959  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
19960  assert((ICEArguments & (1 << i)) == 0);
19961  auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
19962  auto *PTy = FTy->getParamType(i);
19963  if (PTy != ArgValue->getType())
19964  ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
19965  Args.push_back(ArgValue);
19966  }
19967 
19968  return CGF.Builder.CreateCall(F, Args);
19969 }
19970 } // namespace
19971 
19973  const CallExpr *E) {
19974  auto MakeScopedLd = [&](unsigned IntrinsicID) {
19975  Value *Ptr = EmitScalarExpr(E->getArg(0));
19976  llvm::Type *ElemTy =
19977  ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
19978  return Builder.CreateCall(
19979  CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), {Ptr});
19980  };
19981  auto MakeScopedSt = [&](unsigned IntrinsicID) {
19982  Value *Ptr = EmitScalarExpr(E->getArg(0));
19983  llvm::Type *ElemTy =
19984  ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
19985  return Builder.CreateCall(
19986  CGM.getIntrinsic(IntrinsicID, {Ptr->getType(), ElemTy}),
19987  {Ptr, EmitScalarExpr(E->getArg(1))});
19988  };
19989  auto MakeScopedCasAtomic = [&](unsigned IntrinsicID) {
19990  Value *Ptr = EmitScalarExpr(E->getArg(0));
19991  llvm::Type *ElemTy =
19992  ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
19993  return Builder.CreateCall(
19994  CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19995  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
19996  };
19997  switch (BuiltinID) {
19998 
19999 #define LD_VOLATILE_CASES(ADDR_SPACE) \
20000  case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_i: \
20001  case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_l: \
20002  case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_ll: \
20003  return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_i_volatile); \
20004  case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_f: \
20005  case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_d: \
20006  return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_f_volatile);
20007 
20008 #define LD_CASES(ORDER, SCOPE, ADDR_SPACE) \
20009  case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_i: \
20010  case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_l: \
20011  case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_ll: \
20012  return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_i##ORDER##SCOPE); \
20013  case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_f: \
20014  case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_d: \
20015  return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_f##ORDER##SCOPE);
20016 
20017 #define LD_CASES_AS(ORDER, SCOPE) \
20018  LD_CASES(ORDER, SCOPE, _gen) \
20019  LD_CASES(ORDER, SCOPE, _global) \
20020  LD_CASES(ORDER, SCOPE, _shared)
20021 
20022 #define LD_CASES_AS_SCOPES(ORDER) \
20023  LD_CASES_AS(ORDER, ) \
20024  LD_CASES_AS(ORDER, _cta) \
20025  LD_CASES_AS(ORDER, _sys)
20026 
20028  LD_CASES_AS_SCOPES(_acquire)
20029  LD_VOLATILE_CASES(_gen)
20030  LD_VOLATILE_CASES(_global)
20031  LD_VOLATILE_CASES(_shared)
20032 
20033 #undef LD_VOLATILE_CASES
20034 #undef LD_CASES
20035 #undef LD_CASES_AS
20036 #undef LD_CASES_AS_SCOPES
20037 
20038 #define ST_VOLATILE_CASES(ADDR_SPACE) \
20039  case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_i: \
20040  case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_l: \
20041  case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_ll: \
20042  return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_i_volatile); \
20043  case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_f: \
20044  case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_d: \
20045  return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_f_volatile);
20046 
20047 #define ST_CASES(ORDER, SCOPE, ADDR_SPACE) \
20048  case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_i: \
20049  case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_l: \
20050  case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_ll: \
20051  return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_i##ORDER##SCOPE); \
20052  case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_f: \
20053  case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_d: \
20054  return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_f##ORDER##SCOPE);
20055 
20056 #define ST_CASES_AS(ORDER, SCOPE) \
20057  ST_CASES(ORDER, SCOPE, _gen) \
20058  ST_CASES(ORDER, SCOPE, _global) \
20059  ST_CASES(ORDER, SCOPE, _shared)
20060 
20061 #define ST_CASES_AS_SCOPES(ORDER) \
20062  ST_CASES_AS(ORDER, ) \
20063  ST_CASES_AS(ORDER, _cta) \
20064  ST_CASES_AS(ORDER, _sys)
20065 
20067  ST_CASES_AS_SCOPES(_release)
20068  ST_VOLATILE_CASES(_gen)
20069  ST_VOLATILE_CASES(_global)
20070  ST_VOLATILE_CASES(_shared)
20071 
20072 #undef ST_VOLATILE_CASES
20073 #undef ST_CASES
20074 #undef ST_CASES_AS
20075 #undef ST_CASES_AS_SCOPES
20076 
20077  case NVPTX::BI__nvvm_atom_add_gen_i:
20078  case NVPTX::BI__nvvm_atom_add_gen_l:
20079  case NVPTX::BI__nvvm_atom_add_gen_ll:
20081 
20082  case NVPTX::BI__nvvm_atom_sub_gen_i:
20083  case NVPTX::BI__nvvm_atom_sub_gen_l:
20084  case NVPTX::BI__nvvm_atom_sub_gen_ll:
20086 
20087  case NVPTX::BI__nvvm_atom_and_gen_i:
20088  case NVPTX::BI__nvvm_atom_and_gen_l:
20089  case NVPTX::BI__nvvm_atom_and_gen_ll:
20091 
20092  case NVPTX::BI__nvvm_atom_or_gen_i:
20093  case NVPTX::BI__nvvm_atom_or_gen_l:
20094  case NVPTX::BI__nvvm_atom_or_gen_ll:
20095  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
20096 
20097  case NVPTX::BI__nvvm_atom_xor_gen_i:
20098  case NVPTX::BI__nvvm_atom_xor_gen_l:
20099  case NVPTX::BI__nvvm_atom_xor_gen_ll:
20100  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
20101 
20102  case NVPTX::BI__nvvm_atom_xchg_gen_i:
20103  case NVPTX::BI__nvvm_atom_xchg_gen_l:
20104  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
20105  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
20106 
20107  case NVPTX::BI__nvvm_atom_max_gen_i:
20108  case NVPTX::BI__nvvm_atom_max_gen_l:
20109  case NVPTX::BI__nvvm_atom_max_gen_ll:
20110  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
20111 
20112  case NVPTX::BI__nvvm_atom_max_gen_ui:
20113  case NVPTX::BI__nvvm_atom_max_gen_ul:
20114  case NVPTX::BI__nvvm_atom_max_gen_ull:
20115  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
20116 
20117  case NVPTX::BI__nvvm_atom_min_gen_i:
20118  case NVPTX::BI__nvvm_atom_min_gen_l:
20119  case NVPTX::BI__nvvm_atom_min_gen_ll:
20120  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
20121 
20122  case NVPTX::BI__nvvm_atom_min_gen_ui:
20123  case NVPTX::BI__nvvm_atom_min_gen_ul:
20124  case NVPTX::BI__nvvm_atom_min_gen_ull:
20125  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
20126 
20127  case NVPTX::BI__nvvm_atom_cas_gen_i:
20128  case NVPTX::BI__nvvm_atom_cas_gen_l:
20129  case NVPTX::BI__nvvm_atom_cas_gen_ll:
20130  // __nvvm_atom_cas_gen_* should return the old value rather than the
20131  // success flag.
20132  return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
20133 
20134  case NVPTX::BI__nvvm_atom_add_gen_f:
20135  case NVPTX::BI__nvvm_atom_add_gen_d: {
20136  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
20137  Value *Val = EmitScalarExpr(E->getArg(1));
20138 
20139  return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
20140  AtomicOrdering::SequentiallyConsistent);
20141  }
20142 
20143  case NVPTX::BI__nvvm_atom_xchg_gen_f:
20144  case NVPTX::BI__nvvm_atom_xchg_gen_d:
20145  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f, *this, E);
20146 
20147  case NVPTX::BI__nvvm_atom_cas_gen_f:
20148  case NVPTX::BI__nvvm_atom_cas_gen_d:
20149  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f);
20150 
20151  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
20152  Value *Ptr = EmitScalarExpr(E->getArg(0));
20153  Value *Val = EmitScalarExpr(E->getArg(1));
20154  Function *FnALI32 =
20155  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
20156  return Builder.CreateCall(FnALI32, {Ptr, Val});
20157  }
20158 
20159  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
20160  Value *Ptr = EmitScalarExpr(E->getArg(0));
20161  Value *Val = EmitScalarExpr(E->getArg(1));
20162  Function *FnALD32 =
20163  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
20164  return Builder.CreateCall(FnALD32, {Ptr, Val});
20165  }
20166 
20167  case NVPTX::BI__nvvm_ldg_c:
20168  case NVPTX::BI__nvvm_ldg_sc:
20169  case NVPTX::BI__nvvm_ldg_c2:
20170  case NVPTX::BI__nvvm_ldg_sc2:
20171  case NVPTX::BI__nvvm_ldg_c4:
20172  case NVPTX::BI__nvvm_ldg_sc4:
20173  case NVPTX::BI__nvvm_ldg_s:
20174  case NVPTX::BI__nvvm_ldg_s2:
20175  case NVPTX::BI__nvvm_ldg_s4:
20176  case NVPTX::BI__nvvm_ldg_i:
20177  case NVPTX::BI__nvvm_ldg_i2:
20178  case NVPTX::BI__nvvm_ldg_i4:
20179  case NVPTX::BI__nvvm_ldg_l:
20180  case NVPTX::BI__nvvm_ldg_l2:
20181  case NVPTX::BI__nvvm_ldg_ll:
20182  case NVPTX::BI__nvvm_ldg_ll2:
20183  case NVPTX::BI__nvvm_ldg_uc:
20184  case NVPTX::BI__nvvm_ldg_uc2:
20185  case NVPTX::BI__nvvm_ldg_uc4:
20186  case NVPTX::BI__nvvm_ldg_us:
20187  case NVPTX::BI__nvvm_ldg_us2:
20188  case NVPTX::BI__nvvm_ldg_us4:
20189  case NVPTX::BI__nvvm_ldg_ui:
20190  case NVPTX::BI__nvvm_ldg_ui2:
20191  case NVPTX::BI__nvvm_ldg_ui4:
20192  case NVPTX::BI__nvvm_ldg_ul:
20193  case NVPTX::BI__nvvm_ldg_ul2:
20194  case NVPTX::BI__nvvm_ldg_ull:
20195  case NVPTX::BI__nvvm_ldg_ull2:
20196  // PTX Interoperability section 2.2: "For a vector with an even number of
20197  // elements, its alignment is set to number of elements times the alignment
20198  // of its member: n*alignof(t)."
20199  return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
20200  case NVPTX::BI__nvvm_ldg_f:
20201  case NVPTX::BI__nvvm_ldg_f2:
20202  case NVPTX::BI__nvvm_ldg_f4:
20203  case NVPTX::BI__nvvm_ldg_d:
20204  case NVPTX::BI__nvvm_ldg_d2:
20205  return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
20206 
20207  case NVPTX::BI__nvvm_ldu_c:
20208  case NVPTX::BI__nvvm_ldu_sc:
20209  case NVPTX::BI__nvvm_ldu_c2:
20210  case NVPTX::BI__nvvm_ldu_sc2:
20211  case NVPTX::BI__nvvm_ldu_c4:
20212  case NVPTX::BI__nvvm_ldu_sc4:
20213  case NVPTX::BI__nvvm_ldu_s:
20214  case NVPTX::BI__nvvm_ldu_s2:
20215  case NVPTX::BI__nvvm_ldu_s4:
20216  case NVPTX::BI__nvvm_ldu_i:
20217  case NVPTX::BI__nvvm_ldu_i2:
20218  case NVPTX::BI__nvvm_ldu_i4:
20219  case NVPTX::BI__nvvm_ldu_l:
20220  case NVPTX::BI__nvvm_ldu_l2:
20221  case NVPTX::BI__nvvm_ldu_ll:
20222  case NVPTX::BI__nvvm_ldu_ll2:
20223  case NVPTX::BI__nvvm_ldu_uc:
20224  case NVPTX::BI__nvvm_ldu_uc2:
20225  case NVPTX::BI__nvvm_ldu_uc4:
20226  case NVPTX::BI__nvvm_ldu_us:
20227  case NVPTX::BI__nvvm_ldu_us2:
20228  case NVPTX::BI__nvvm_ldu_us4:
20229  case NVPTX::BI__nvvm_ldu_ui:
20230  case NVPTX::BI__nvvm_ldu_ui2:
20231  case NVPTX::BI__nvvm_ldu_ui4:
20232  case NVPTX::BI__nvvm_ldu_ul:
20233  case NVPTX::BI__nvvm_ldu_ul2:
20234  case NVPTX::BI__nvvm_ldu_ull:
20235  case NVPTX::BI__nvvm_ldu_ull2:
20236  return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20237  case NVPTX::BI__nvvm_ldu_f:
20238  case NVPTX::BI__nvvm_ldu_f2:
20239  case NVPTX::BI__nvvm_ldu_f4:
20240  case NVPTX::BI__nvvm_ldu_d:
20241  case NVPTX::BI__nvvm_ldu_d2:
20242  return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20243 
20244  case NVPTX::BI__nvvm_atom_cta_add_gen_i:
20245  case NVPTX::BI__nvvm_atom_cta_add_gen_l:
20246  case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
20247  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
20248  case NVPTX::BI__nvvm_atom_sys_add_gen_i:
20249  case NVPTX::BI__nvvm_atom_sys_add_gen_l:
20250  case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
20251  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
20252  case NVPTX::BI__nvvm_atom_cta_add_gen_f:
20253  case NVPTX::BI__nvvm_atom_cta_add_gen_d:
20254  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
20255  case NVPTX::BI__nvvm_atom_sys_add_gen_f:
20256  case NVPTX::BI__nvvm_atom_sys_add_gen_d:
20257  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
20258  case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
20259  case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
20260  case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
20261  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
20262  case NVPTX::BI__nvvm_atom_cta_xchg_gen_f:
20263  case NVPTX::BI__nvvm_atom_cta_xchg_gen_d:
20264  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_cta, *this, E);
20265  case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
20266  case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
20267  case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
20268  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
20269  case NVPTX::BI__nvvm_atom_sys_xchg_gen_f:
20270  case NVPTX::BI__nvvm_atom_sys_xchg_gen_d:
20271  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_sys, *this, E);
20272  case NVPTX::BI__nvvm_atom_cta_max_gen_i:
20273  case NVPTX::BI__nvvm_atom_cta_max_gen_l:
20274  case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
20275  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
20276  case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
20277  case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
20278  case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
20279  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_cta, *this, E);
20280  case NVPTX::BI__nvvm_atom_sys_max_gen_i:
20281  case NVPTX::BI__nvvm_atom_sys_max_gen_l:
20282  case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
20283  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
20284  case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
20285  case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
20286  case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
20287  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_sys, *this, E);
20288  case NVPTX::BI__nvvm_atom_cta_min_gen_i:
20289  case NVPTX::BI__nvvm_atom_cta_min_gen_l:
20290  case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
20291  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
20292  case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
20293  case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
20294  case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
20295  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_cta, *this, E);
20296  case NVPTX::BI__nvvm_atom_sys_min_gen_i:
20297  case NVPTX::BI__nvvm_atom_sys_min_gen_l:
20298  case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
20299  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
20300  case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
20301  case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
20302  case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
20303  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_sys, *this, E);
20304  case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
20305  case NVPTX::BI__nvvm_atom_cta_inc_gen_ul:
20306  case NVPTX::BI__nvvm_atom_cta_inc_gen_ull:
20307  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
20308  case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
20309  case NVPTX::BI__nvvm_atom_cta_dec_gen_ul:
20310  case NVPTX::BI__nvvm_atom_cta_dec_gen_ull:
20311  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
20312  case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
20313  case NVPTX::BI__nvvm_atom_sys_inc_gen_ul:
20314  case NVPTX::BI__nvvm_atom_sys_inc_gen_ull:
20315  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
20316  case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
20317  case NVPTX::BI__nvvm_atom_sys_dec_gen_ul:
20318  case NVPTX::BI__nvvm_atom_sys_dec_gen_ull:
20319  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
20320  case NVPTX::BI__nvvm_atom_cta_and_gen_i:
20321  case NVPTX::BI__nvvm_atom_cta_and_gen_l:
20322  case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
20323  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
20324  case NVPTX::BI__nvvm_atom_sys_and_gen_i:
20325  case NVPTX::BI__nvvm_atom_sys_and_gen_l:
20326  case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
20327  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
20328  case NVPTX::BI__nvvm_atom_cta_or_gen_i:
20329  case NVPTX::BI__nvvm_atom_cta_or_gen_l:
20330  case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
20331  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
20332  case NVPTX::BI__nvvm_atom_sys_or_gen_i:
20333  case NVPTX::BI__nvvm_atom_sys_or_gen_l:
20334  case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
20335  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
20336  case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
20337  case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
20338  case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
20339  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
20340  case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
20341  case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
20342  case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
20343  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
20344  case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
20345  case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
20346  case NVPTX::BI__nvvm_atom_cta_cas_gen_ll:
20347  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_cta);
20348  case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
20349  case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
20350  case NVPTX::BI__nvvm_atom_sys_cas_gen_ll:
20351  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_sys);
20352  case NVPTX::BI__nvvm_atom_cta_cas_gen_f:
20353  case NVPTX::BI__nvvm_atom_cta_cas_gen_d:
20354  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_cta);
20355  case NVPTX::BI__nvvm_atom_sys_cas_gen_f:
20356  case NVPTX::BI__nvvm_atom_sys_cas_gen_d:
20357  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_sys);
20358  case NVPTX::BI__nvvm_atom_acquire_add_gen_i:
20359  case NVPTX::BI__nvvm_atom_acquire_add_gen_l:
20360  case NVPTX::BI__nvvm_atom_acquire_add_gen_ll:
20361  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_acquire, *this, E);
20362  case NVPTX::BI__nvvm_atom_acquire_add_gen_f:
20363  case NVPTX::BI__nvvm_atom_acquire_add_gen_d:
20364  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_acquire, *this, E);
20365  case NVPTX::BI__nvvm_atom_acquire_xchg_gen_i:
20366  case NVPTX::BI__nvvm_atom_acquire_xchg_gen_l:
20367  case NVPTX::BI__nvvm_atom_acquire_xchg_gen_ll:
20368  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_acquire, *this, E);
20369  case NVPTX::BI__nvvm_atom_acquire_xchg_gen_f:
20370  case NVPTX::BI__nvvm_atom_acquire_xchg_gen_d:
20371  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_acquire, *this, E);
20372  case NVPTX::BI__nvvm_atom_acquire_max_gen_i:
20373  case NVPTX::BI__nvvm_atom_acquire_max_gen_l:
20374  case NVPTX::BI__nvvm_atom_acquire_max_gen_ll:
20375  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_acquire, *this, E);
20376  case NVPTX::BI__nvvm_atom_acquire_max_gen_ui:
20377  case NVPTX::BI__nvvm_atom_acquire_max_gen_ul:
20378  case NVPTX::BI__nvvm_atom_acquire_max_gen_ull:
20379  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_acquire, *this, E);
20380  case NVPTX::BI__nvvm_atom_acquire_min_gen_i:
20381  case NVPTX::BI__nvvm_atom_acquire_min_gen_l:
20382  case NVPTX::BI__nvvm_atom_acquire_min_gen_ll:
20383  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_acquire, *this, E);
20384  case NVPTX::BI__nvvm_atom_acquire_min_gen_ui:
20385  case NVPTX::BI__nvvm_atom_acquire_min_gen_ul:
20386  case NVPTX::BI__nvvm_atom_acquire_min_gen_ull:
20387  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_acquire, *this, E);
20388  case NVPTX::BI__nvvm_atom_acquire_inc_gen_ui:
20389  case NVPTX::BI__nvvm_atom_acquire_inc_gen_ul:
20390  case NVPTX::BI__nvvm_atom_acquire_inc_gen_ull:
20391  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_acquire, *this, E);
20392  case NVPTX::BI__nvvm_atom_acquire_dec_gen_ui:
20393  case NVPTX::BI__nvvm_atom_acquire_dec_gen_ul:
20394  case NVPTX::BI__nvvm_atom_acquire_dec_gen_ull:
20395  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_acquire, *this, E);
20396  case NVPTX::BI__nvvm_atom_acquire_and_gen_i:
20397  case NVPTX::BI__nvvm_atom_acquire_and_gen_l:
20398  case NVPTX::BI__nvvm_atom_acquire_and_gen_ll:
20399  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_acquire, *this, E);
20400  case NVPTX::BI__nvvm_atom_acquire_or_gen_i:
20401  case NVPTX::BI__nvvm_atom_acquire_or_gen_l:
20402  case NVPTX::BI__nvvm_atom_acquire_or_gen_ll:
20403  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_acquire, *this, E);
20404  case NVPTX::BI__nvvm_atom_acquire_xor_gen_i:
20405  case NVPTX::BI__nvvm_atom_acquire_xor_gen_l:
20406  case NVPTX::BI__nvvm_atom_acquire_xor_gen_ll:
20407  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_acquire, *this, E);
20408  case NVPTX::BI__nvvm_atom_acquire_cas_gen_i:
20409  case NVPTX::BI__nvvm_atom_acquire_cas_gen_l:
20410  case NVPTX::BI__nvvm_atom_acquire_cas_gen_ll:
20411  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_acquire);
20412  case NVPTX::BI__nvvm_atom_acquire_cas_gen_f:
20413  case NVPTX::BI__nvvm_atom_acquire_cas_gen_d:
20414  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_acquire);
20415  case NVPTX::BI__nvvm_atom_acquire_cta_add_gen_i:
20416  case NVPTX::BI__nvvm_atom_acquire_cta_add_gen_l:
20417  case NVPTX::BI__nvvm_atom_acquire_cta_add_gen_ll:
20418  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_acquire_cta, *this, E);
20419  case NVPTX::BI__nvvm_atom_acquire_sys_add_gen_i:
20420  case NVPTX::BI__nvvm_atom_acquire_sys_add_gen_l:
20421  case NVPTX::BI__nvvm_atom_acquire_sys_add_gen_ll:
20422  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_acquire_sys, *this, E);
20423  case NVPTX::BI__nvvm_atom_acquire_cta_add_gen_f:
20424  case NVPTX::BI__nvvm_atom_acquire_cta_add_gen_d:
20425  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_acquire_cta, *this, E);
20426  case NVPTX::BI__nvvm_atom_acquire_sys_add_gen_f:
20427  case NVPTX::BI__nvvm_atom_acquire_sys_add_gen_d:
20428  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_acquire_sys, *this, E);
20429  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_gen_i:
20430  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_gen_l:
20431  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_gen_ll:
20432  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_acquire_cta, *this, E);
20433  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_gen_f:
20434  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_gen_d:
20435  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_acquire_cta, *this, E);
20436  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_gen_i:
20437  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_gen_l:
20438  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_gen_ll:
20439  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_acquire_sys, *this, E);
20440  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_gen_f:
20441  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_gen_d:
20442  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_acquire_sys, *this, E);
20443  case NVPTX::BI__nvvm_atom_acquire_cta_max_gen_i:
20444  case NVPTX::BI__nvvm_atom_acquire_cta_max_gen_l:
20445  case NVPTX::BI__nvvm_atom_acquire_cta_max_gen_ll:
20446  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_acquire_cta, *this, E);
20447  case NVPTX::BI__nvvm_atom_acquire_cta_max_gen_ui:
20448  case NVPTX::BI__nvvm_atom_acquire_cta_max_gen_ul:
20449  case NVPTX::BI__nvvm_atom_acquire_cta_max_gen_ull:
20450  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_acquire_cta, *this, E);
20451  case NVPTX::BI__nvvm_atom_acquire_sys_max_gen_i:
20452  case NVPTX::BI__nvvm_atom_acquire_sys_max_gen_l:
20453  case NVPTX::BI__nvvm_atom_acquire_sys_max_gen_ll:
20454  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_acquire_sys, *this, E);
20455  case NVPTX::BI__nvvm_atom_acquire_sys_max_gen_ui:
20456  case NVPTX::BI__nvvm_atom_acquire_sys_max_gen_ul:
20457  case NVPTX::BI__nvvm_atom_acquire_sys_max_gen_ull:
20458  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_acquire_sys, *this, E);
20459  case NVPTX::BI__nvvm_atom_acquire_cta_min_gen_i:
20460  case NVPTX::BI__nvvm_atom_acquire_cta_min_gen_l:
20461  case NVPTX::BI__nvvm_atom_acquire_cta_min_gen_ll:
20462  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_acquire_cta, *this, E);
20463  case NVPTX::BI__nvvm_atom_acquire_cta_min_gen_ui:
20464  case NVPTX::BI__nvvm_atom_acquire_cta_min_gen_ul:
20465  case NVPTX::BI__nvvm_atom_acquire_cta_min_gen_ull:
20466  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_acquire_cta, *this, E);
20467  case NVPTX::BI__nvvm_atom_acquire_sys_min_gen_i:
20468  case NVPTX::BI__nvvm_atom_acquire_sys_min_gen_l:
20469  case NVPTX::BI__nvvm_atom_acquire_sys_min_gen_ll:
20470  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_acquire_sys, *this, E);
20471  case NVPTX::BI__nvvm_atom_acquire_sys_min_gen_ui:
20472  case NVPTX::BI__nvvm_atom_acquire_sys_min_gen_ul:
20473  case NVPTX::BI__nvvm_atom_acquire_sys_min_gen_ull:
20474  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_acquire_sys, *this, E);
20475  case NVPTX::BI__nvvm_atom_acquire_cta_inc_gen_ui:
20476  case NVPTX::BI__nvvm_atom_acquire_cta_inc_gen_ul:
20477  case NVPTX::BI__nvvm_atom_acquire_cta_inc_gen_ull:
20478  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_acquire_cta, *this, E);
20479  case NVPTX::BI__nvvm_atom_acquire_cta_dec_gen_ui:
20480  case NVPTX::BI__nvvm_atom_acquire_cta_dec_gen_ul:
20481  case NVPTX::BI__nvvm_atom_acquire_cta_dec_gen_ull:
20482  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_acquire_cta, *this, E);
20483  case NVPTX::BI__nvvm_atom_acquire_sys_inc_gen_ui:
20484  case NVPTX::BI__nvvm_atom_acquire_sys_inc_gen_ul:
20485  case NVPTX::BI__nvvm_atom_acquire_sys_inc_gen_ull:
20486  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_acquire_sys, *this, E);
20487  case NVPTX::BI__nvvm_atom_acquire_sys_dec_gen_ui:
20488  case NVPTX::BI__nvvm_atom_acquire_sys_dec_gen_ul:
20489  case NVPTX::BI__nvvm_atom_acquire_sys_dec_gen_ull:
20490  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_acquire_sys, *this, E);
20491  case NVPTX::BI__nvvm_atom_acquire_cta_and_gen_i:
20492  case NVPTX::BI__nvvm_atom_acquire_cta_and_gen_l:
20493  case NVPTX::BI__nvvm_atom_acquire_cta_and_gen_ll:
20494  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_acquire_cta, *this, E);
20495  case NVPTX::BI__nvvm_atom_acquire_sys_and_gen_i:
20496  case NVPTX::BI__nvvm_atom_acquire_sys_and_gen_l:
20497  case NVPTX::BI__nvvm_atom_acquire_sys_and_gen_ll:
20498  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_acquire_sys, *this, E);
20499  case NVPTX::BI__nvvm_atom_acquire_cta_or_gen_i:
20500  case NVPTX::BI__nvvm_atom_acquire_cta_or_gen_l:
20501  case NVPTX::BI__nvvm_atom_acquire_cta_or_gen_ll:
20502  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_acquire_cta, *this, E);
20503  case NVPTX::BI__nvvm_atom_acquire_sys_or_gen_i:
20504  case NVPTX::BI__nvvm_atom_acquire_sys_or_gen_l:
20505  case NVPTX::BI__nvvm_atom_acquire_sys_or_gen_ll:
20506  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_acquire_sys, *this, E);
20507  case NVPTX::BI__nvvm_atom_acquire_cta_xor_gen_i:
20508  case NVPTX::BI__nvvm_atom_acquire_cta_xor_gen_l:
20509  case NVPTX::BI__nvvm_atom_acquire_cta_xor_gen_ll:
20510  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_acquire_cta, *this, E);
20511  case NVPTX::BI__nvvm_atom_acquire_sys_xor_gen_i:
20512  case NVPTX::BI__nvvm_atom_acquire_sys_xor_gen_l:
20513  case NVPTX::BI__nvvm_atom_acquire_sys_xor_gen_ll:
20514  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_acquire_sys, *this, E);
20515  case NVPTX::BI__nvvm_atom_acquire_cta_cas_gen_i:
20516  case NVPTX::BI__nvvm_atom_acquire_cta_cas_gen_l:
20517  case NVPTX::BI__nvvm_atom_acquire_cta_cas_gen_ll:
20518  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_acquire_cta);
20519  case NVPTX::BI__nvvm_atom_acquire_sys_cas_gen_i:
20520  case NVPTX::BI__nvvm_atom_acquire_sys_cas_gen_l:
20521  case NVPTX::BI__nvvm_atom_acquire_sys_cas_gen_ll:
20522  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_acquire_sys);
20523  case NVPTX::BI__nvvm_atom_acquire_cta_cas_gen_f:
20524  case NVPTX::BI__nvvm_atom_acquire_cta_cas_gen_d:
20525  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_acquire_cta);
20526  case NVPTX::BI__nvvm_atom_acquire_sys_cas_gen_f:
20527  case NVPTX::BI__nvvm_atom_acquire_sys_cas_gen_d:
20528  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_acquire_sys);
20529  case NVPTX::BI__nvvm_atom_release_add_gen_i:
20530  case NVPTX::BI__nvvm_atom_release_add_gen_l:
20531  case NVPTX::BI__nvvm_atom_release_add_gen_ll:
20532  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_release, *this, E);
20533  case NVPTX::BI__nvvm_atom_release_add_gen_f:
20534  case NVPTX::BI__nvvm_atom_release_add_gen_d:
20535  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_release, *this, E);
20536  case NVPTX::BI__nvvm_atom_release_xchg_gen_i:
20537  case NVPTX::BI__nvvm_atom_release_xchg_gen_l:
20538  case NVPTX::BI__nvvm_atom_release_xchg_gen_ll:
20539  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_release, *this, E);
20540  case NVPTX::BI__nvvm_atom_release_xchg_gen_f:
20541  case NVPTX::BI__nvvm_atom_release_xchg_gen_d:
20542  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_release, *this, E);
20543  case NVPTX::BI__nvvm_atom_release_max_gen_i:
20544  case NVPTX::BI__nvvm_atom_release_max_gen_l:
20545  case NVPTX::BI__nvvm_atom_release_max_gen_ll:
20546  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_release, *this, E);
20547  case NVPTX::BI__nvvm_atom_release_max_gen_ui:
20548  case NVPTX::BI__nvvm_atom_release_max_gen_ul:
20549  case NVPTX::BI__nvvm_atom_release_max_gen_ull:
20550  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_release, *this, E);
20551  case NVPTX::BI__nvvm_atom_release_min_gen_i:
20552  case NVPTX::BI__nvvm_atom_release_min_gen_l:
20553  case NVPTX::BI__nvvm_atom_release_min_gen_ll:
20554  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_release, *this, E);
20555  case NVPTX::BI__nvvm_atom_release_min_gen_ui:
20556  case NVPTX::BI__nvvm_atom_release_min_gen_ul:
20557  case NVPTX::BI__nvvm_atom_release_min_gen_ull:
20558  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_release, *this, E);
20559  case NVPTX::BI__nvvm_atom_release_inc_gen_ui:
20560  case NVPTX::BI__nvvm_atom_release_inc_gen_ul:
20561  case NVPTX::BI__nvvm_atom_release_inc_gen_ull:
20562  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_release, *this, E);
20563  case NVPTX::BI__nvvm_atom_release_dec_gen_ui:
20564  case NVPTX::BI__nvvm_atom_release_dec_gen_ul:
20565  case NVPTX::BI__nvvm_atom_release_dec_gen_ull:
20566  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_release, *this, E);
20567  case NVPTX::BI__nvvm_atom_release_and_gen_i:
20568  case NVPTX::BI__nvvm_atom_release_and_gen_l:
20569  case NVPTX::BI__nvvm_atom_release_and_gen_ll:
20570  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_release, *this, E);
20571  case NVPTX::BI__nvvm_atom_release_or_gen_i:
20572  case NVPTX::BI__nvvm_atom_release_or_gen_l:
20573  case NVPTX::BI__nvvm_atom_release_or_gen_ll:
20574  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_release, *this, E);
20575  case NVPTX::BI__nvvm_atom_release_xor_gen_i:
20576  case NVPTX::BI__nvvm_atom_release_xor_gen_l:
20577  case NVPTX::BI__nvvm_atom_release_xor_gen_ll:
20578  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_release, *this, E);
20579  case NVPTX::BI__nvvm_atom_release_cas_gen_i:
20580  case NVPTX::BI__nvvm_atom_release_cas_gen_l:
20581  case NVPTX::BI__nvvm_atom_release_cas_gen_ll:
20582  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_release);
20583  case NVPTX::BI__nvvm_atom_release_cas_gen_f:
20584  case NVPTX::BI__nvvm_atom_release_cas_gen_d:
20585  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_release);
20586  case NVPTX::BI__nvvm_atom_release_cta_add_gen_i:
20587  case NVPTX::BI__nvvm_atom_release_cta_add_gen_l:
20588  case NVPTX::BI__nvvm_atom_release_cta_add_gen_ll:
20589  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_release_cta, *this, E);
20590  case NVPTX::BI__nvvm_atom_release_sys_add_gen_i:
20591  case NVPTX::BI__nvvm_atom_release_sys_add_gen_l:
20592  case NVPTX::BI__nvvm_atom_release_sys_add_gen_ll:
20593  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_release_sys, *this, E);
20594  case NVPTX::BI__nvvm_atom_release_cta_add_gen_f:
20595  case NVPTX::BI__nvvm_atom_release_cta_add_gen_d:
20596  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_release_cta, *this, E);
20597  case NVPTX::BI__nvvm_atom_release_sys_add_gen_f:
20598  case NVPTX::BI__nvvm_atom_release_sys_add_gen_d:
20599  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_release_sys, *this, E);
20600  case NVPTX::BI__nvvm_atom_release_cta_xchg_gen_i:
20601  case NVPTX::BI__nvvm_atom_release_cta_xchg_gen_l:
20602  case NVPTX::BI__nvvm_atom_release_cta_xchg_gen_ll:
20603  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_release_cta, *this, E);
20604  case NVPTX::BI__nvvm_atom_release_cta_xchg_gen_f:
20605  case NVPTX::BI__nvvm_atom_release_cta_xchg_gen_d:
20606  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_release_cta, *this, E);
20607  case NVPTX::BI__nvvm_atom_release_sys_xchg_gen_i:
20608  case NVPTX::BI__nvvm_atom_release_sys_xchg_gen_l:
20609  case NVPTX::BI__nvvm_atom_release_sys_xchg_gen_ll:
20610  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_release_sys, *this, E);
20611  case NVPTX::BI__nvvm_atom_release_sys_xchg_gen_f:
20612  case NVPTX::BI__nvvm_atom_release_sys_xchg_gen_d:
20613  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_release_sys, *this, E);
20614  case NVPTX::BI__nvvm_atom_release_cta_max_gen_i:
20615  case NVPTX::BI__nvvm_atom_release_cta_max_gen_l:
20616  case NVPTX::BI__nvvm_atom_release_cta_max_gen_ll:
20617  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_release_cta, *this, E);
20618  case NVPTX::BI__nvvm_atom_release_cta_max_gen_ui:
20619  case NVPTX::BI__nvvm_atom_release_cta_max_gen_ul:
20620  case NVPTX::BI__nvvm_atom_release_cta_max_gen_ull:
20621  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_release_cta, *this, E);
20622  case NVPTX::BI__nvvm_atom_release_sys_max_gen_i:
20623  case NVPTX::BI__nvvm_atom_release_sys_max_gen_l:
20624  case NVPTX::BI__nvvm_atom_release_sys_max_gen_ll:
20625  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_release_sys, *this, E);
20626  case NVPTX::BI__nvvm_atom_release_sys_max_gen_ui:
20627  case NVPTX::BI__nvvm_atom_release_sys_max_gen_ul:
20628  case NVPTX::BI__nvvm_atom_release_sys_max_gen_ull:
20629  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_release_sys, *this, E);
20630  case NVPTX::BI__nvvm_atom_release_cta_min_gen_i:
20631  case NVPTX::BI__nvvm_atom_release_cta_min_gen_l:
20632  case NVPTX::BI__nvvm_atom_release_cta_min_gen_ll:
20633  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_release_cta, *this, E);
20634  case NVPTX::BI__nvvm_atom_release_cta_min_gen_ui:
20635  case NVPTX::BI__nvvm_atom_release_cta_min_gen_ul:
20636  case NVPTX::BI__nvvm_atom_release_cta_min_gen_ull:
20637  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_release_cta, *this, E);
20638  case NVPTX::BI__nvvm_atom_release_sys_min_gen_i:
20639  case NVPTX::BI__nvvm_atom_release_sys_min_gen_l:
20640  case NVPTX::BI__nvvm_atom_release_sys_min_gen_ll:
20641  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_release_sys, *this, E);
20642  case NVPTX::BI__nvvm_atom_release_sys_min_gen_ui:
20643  case NVPTX::BI__nvvm_atom_release_sys_min_gen_ul:
20644  case NVPTX::BI__nvvm_atom_release_sys_min_gen_ull:
20645  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_release_sys, *this, E);
20646  case NVPTX::BI__nvvm_atom_release_cta_inc_gen_ui:
20647  case NVPTX::BI__nvvm_atom_release_cta_inc_gen_ul:
20648  case NVPTX::BI__nvvm_atom_release_cta_inc_gen_ull:
20649  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_release_cta, *this, E);
20650  case NVPTX::BI__nvvm_atom_release_cta_dec_gen_ui:
20651  case NVPTX::BI__nvvm_atom_release_cta_dec_gen_ul:
20652  case NVPTX::BI__nvvm_atom_release_cta_dec_gen_ull:
20653  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_release_cta, *this, E);
20654  case NVPTX::BI__nvvm_atom_release_sys_inc_gen_ui:
20655  case NVPTX::BI__nvvm_atom_release_sys_inc_gen_ul:
20656  case NVPTX::BI__nvvm_atom_release_sys_inc_gen_ull:
20657  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_release_sys, *this, E);
20658  case NVPTX::BI__nvvm_atom_release_sys_dec_gen_ui:
20659  case NVPTX::BI__nvvm_atom_release_sys_dec_gen_ul:
20660  case NVPTX::BI__nvvm_atom_release_sys_dec_gen_ull:
20661  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_release_sys, *this, E);
20662  case NVPTX::BI__nvvm_atom_release_cta_and_gen_i:
20663  case NVPTX::BI__nvvm_atom_release_cta_and_gen_l:
20664  case NVPTX::BI__nvvm_atom_release_cta_and_gen_ll:
20665  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_release_cta, *this, E);
20666  case NVPTX::BI__nvvm_atom_release_sys_and_gen_i:
20667  case NVPTX::BI__nvvm_atom_release_sys_and_gen_l:
20668  case NVPTX::BI__nvvm_atom_release_sys_and_gen_ll:
20669  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_release_sys, *this, E);
20670  case NVPTX::BI__nvvm_atom_release_cta_or_gen_i:
20671  case NVPTX::BI__nvvm_atom_release_cta_or_gen_l:
20672  case NVPTX::BI__nvvm_atom_release_cta_or_gen_ll:
20673  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_release_cta, *this, E);
20674  case NVPTX::BI__nvvm_atom_release_sys_or_gen_i:
20675  case NVPTX::BI__nvvm_atom_release_sys_or_gen_l:
20676  case NVPTX::BI__nvvm_atom_release_sys_or_gen_ll:
20677  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_release_sys, *this, E);
20678  case NVPTX::BI__nvvm_atom_release_cta_xor_gen_i:
20679  case NVPTX::BI__nvvm_atom_release_cta_xor_gen_l:
20680  case NVPTX::BI__nvvm_atom_release_cta_xor_gen_ll:
20681  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_release_cta, *this, E);
20682  case NVPTX::BI__nvvm_atom_release_sys_xor_gen_i:
20683  case NVPTX::BI__nvvm_atom_release_sys_xor_gen_l:
20684  case NVPTX::BI__nvvm_atom_release_sys_xor_gen_ll:
20685  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_release_sys, *this, E);
20686  case NVPTX::BI__nvvm_atom_release_cta_cas_gen_i:
20687  case NVPTX::BI__nvvm_atom_release_cta_cas_gen_l:
20688  case NVPTX::BI__nvvm_atom_release_cta_cas_gen_ll:
20689  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_release_cta);
20690  case NVPTX::BI__nvvm_atom_release_sys_cas_gen_i:
20691  case NVPTX::BI__nvvm_atom_release_sys_cas_gen_l:
20692  case NVPTX::BI__nvvm_atom_release_sys_cas_gen_ll:
20693  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_release_sys);
20694  case NVPTX::BI__nvvm_atom_release_cta_cas_gen_f:
20695  case NVPTX::BI__nvvm_atom_release_cta_cas_gen_d:
20696  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_release_cta);
20697  case NVPTX::BI__nvvm_atom_release_sys_cas_gen_f:
20698  case NVPTX::BI__nvvm_atom_release_sys_cas_gen_d:
20699  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_release_sys);
20700  case NVPTX::BI__nvvm_atom_acq_rel_add_gen_i:
20701  case NVPTX::BI__nvvm_atom_acq_rel_add_gen_l:
20702  case NVPTX::BI__nvvm_atom_acq_rel_add_gen_ll:
20703  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_acq_rel, *this, E);
20704  case NVPTX::BI__nvvm_atom_acq_rel_add_gen_f:
20705  case NVPTX::BI__nvvm_atom_acq_rel_add_gen_d:
20706  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_acq_rel, *this, E);
20707  case NVPTX::BI__nvvm_atom_acq_rel_xchg_gen_i:
20708  case NVPTX::BI__nvvm_atom_acq_rel_xchg_gen_l:
20709  case NVPTX::BI__nvvm_atom_acq_rel_xchg_gen_ll:
20710  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_acq_rel, *this, E);
20711  case NVPTX::BI__nvvm_atom_acq_rel_xchg_gen_f:
20712  case NVPTX::BI__nvvm_atom_acq_rel_xchg_gen_d:
20713  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_acq_rel, *this, E);
20714  case NVPTX::BI__nvvm_atom_acq_rel_max_gen_i:
20715  case NVPTX::BI__nvvm_atom_acq_rel_max_gen_l:
20716  case NVPTX::BI__nvvm_atom_acq_rel_max_gen_ll:
20717  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_acq_rel, *this, E);
20718  case NVPTX::BI__nvvm_atom_acq_rel_max_gen_ui:
20719  case NVPTX::BI__nvvm_atom_acq_rel_max_gen_ul:
20720  case NVPTX::BI__nvvm_atom_acq_rel_max_gen_ull:
20721  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_acq_rel, *this, E);
20722  case NVPTX::BI__nvvm_atom_acq_rel_min_gen_i:
20723  case NVPTX::BI__nvvm_atom_acq_rel_min_gen_l:
20724  case NVPTX::BI__nvvm_atom_acq_rel_min_gen_ll:
20725  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_acq_rel, *this, E);
20726  case NVPTX::BI__nvvm_atom_acq_rel_min_gen_ui:
20727  case NVPTX::BI__nvvm_atom_acq_rel_min_gen_ul:
20728  case NVPTX::BI__nvvm_atom_acq_rel_min_gen_ull:
20729  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_acq_rel, *this, E);
20730  case NVPTX::BI__nvvm_atom_acq_rel_inc_gen_ui:
20731  case NVPTX::BI__nvvm_atom_acq_rel_inc_gen_ul:
20732  case NVPTX::BI__nvvm_atom_acq_rel_inc_gen_ull:
20733  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_acq_rel, *this, E);
20734  case NVPTX::BI__nvvm_atom_acq_rel_dec_gen_ui:
20735  case NVPTX::BI__nvvm_atom_acq_rel_dec_gen_ul:
20736  case NVPTX::BI__nvvm_atom_acq_rel_dec_gen_ull:
20737  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_acq_rel, *this, E);
20738  case NVPTX::BI__nvvm_atom_acq_rel_and_gen_i:
20739  case NVPTX::BI__nvvm_atom_acq_rel_and_gen_l:
20740  case NVPTX::BI__nvvm_atom_acq_rel_and_gen_ll:
20741  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_acq_rel, *this, E);
20742  case NVPTX::BI__nvvm_atom_acq_rel_or_gen_i:
20743  case NVPTX::BI__nvvm_atom_acq_rel_or_gen_l:
20744  case NVPTX::BI__nvvm_atom_acq_rel_or_gen_ll:
20745  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_acq_rel, *this, E);
20746  case NVPTX::BI__nvvm_atom_acq_rel_xor_gen_i:
20747  case NVPTX::BI__nvvm_atom_acq_rel_xor_gen_l:
20748  case NVPTX::BI__nvvm_atom_acq_rel_xor_gen_ll:
20749  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_acq_rel, *this, E);
20750  case NVPTX::BI__nvvm_atom_acq_rel_cas_gen_i:
20751  case NVPTX::BI__nvvm_atom_acq_rel_cas_gen_l:
20752  case NVPTX::BI__nvvm_atom_acq_rel_cas_gen_ll:
20753  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_acq_rel);
20754  case NVPTX::BI__nvvm_atom_acq_rel_cas_gen_f:
20755  case NVPTX::BI__nvvm_atom_acq_rel_cas_gen_d:
20756  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_acq_rel);
20757  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_gen_i:
20758  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_gen_l:
20759  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_gen_ll:
20760  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_acq_rel_cta, *this, E);
20761  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_gen_i:
20762  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_gen_l:
20763  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_gen_ll:
20764  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_acq_rel_sys, *this, E);
20765  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_gen_f:
20766  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_gen_d:
20767  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_acq_rel_cta, *this, E);
20768  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_gen_f:
20769  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_gen_d:
20770  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_acq_rel_sys, *this, E);
20771  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_gen_i:
20772  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_gen_l:
20773  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_gen_ll:
20774  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_acq_rel_cta, *this, E);
20775  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_gen_f:
20776  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_gen_d:
20777  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_acq_rel_cta, *this, E);
20778  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_gen_i:
20779  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_gen_l:
20780  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_gen_ll:
20781  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_acq_rel_sys, *this, E);
20782  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_gen_f:
20783  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_gen_d:
20784  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_f_acq_rel_sys, *this, E);
20785  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_gen_i:
20786  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_gen_l:
20787  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_gen_ll:
20788  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_acq_rel_cta, *this, E);
20789  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_gen_ui:
20790  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_gen_ul:
20791  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_gen_ull:
20792  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_acq_rel_cta, *this, E);
20793  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_gen_i:
20794  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_gen_l:
20795  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_gen_ll:
20796  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_acq_rel_sys, *this, E);
20797  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_gen_ui:
20798  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_gen_ul:
20799  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_gen_ull:
20800  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_ui_acq_rel_sys, *this, E);
20801  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_gen_i:
20802  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_gen_l:
20803  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_gen_ll:
20804  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_acq_rel_cta, *this, E);
20805  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_gen_ui:
20806  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_gen_ul:
20807  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_gen_ull:
20808  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_acq_rel_cta, *this, E);
20809  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_gen_i:
20810  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_gen_l:
20811  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_gen_ll:
20812  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_acq_rel_sys, *this, E);
20813  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_gen_ui:
20814  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_gen_ul:
20815  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_gen_ull:
20816  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_ui_acq_rel_sys, *this, E);
20817  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_gen_ui:
20818  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_gen_ul:
20819  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_gen_ull:
20820  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_acq_rel_cta, *this, E);
20821  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_gen_ui:
20822  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_gen_ul:
20823  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_gen_ull:
20824  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_acq_rel_cta, *this, E);
20825  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_gen_ui:
20826  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_gen_ul:
20827  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_gen_ull:
20828  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_acq_rel_sys, *this, E);
20829  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_gen_ui:
20830  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_gen_ul:
20831  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_gen_ull:
20832  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_acq_rel_sys, *this, E);
20833  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_gen_i:
20834  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_gen_l:
20835  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_gen_ll:
20836  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_acq_rel_cta, *this, E);
20837  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_gen_i:
20838  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_gen_l:
20839  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_gen_ll:
20840  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_acq_rel_sys, *this, E);
20841  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_gen_i:
20842  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_gen_l:
20843  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_gen_ll:
20844  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_acq_rel_cta, *this, E);
20845  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_gen_i:
20846  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_gen_l:
20847  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_gen_ll:
20848  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_acq_rel_sys, *this, E);
20849  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_gen_i:
20850  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_gen_l:
20851  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_gen_ll:
20852  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_acq_rel_cta, *this, E);
20853  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_gen_i:
20854  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_gen_l:
20855  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_gen_ll:
20856  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_acq_rel_sys, *this, E);
20857  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_gen_i:
20858  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_gen_l:
20859  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_gen_ll:
20860  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_acq_rel_cta);
20861  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_gen_i:
20862  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_gen_l:
20863  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_gen_ll:
20864  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_i_acq_rel_sys);
20865  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_gen_f:
20866  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_gen_d:
20867  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_acq_rel_cta);
20868  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_gen_f:
20869  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_gen_d:
20870  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_gen_f_acq_rel_sys);
20871  case NVPTX::BI__nvvm_atom_add_global_i:
20872  case NVPTX::BI__nvvm_atom_add_global_l:
20873  case NVPTX::BI__nvvm_atom_add_global_ll:
20874  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i, *this, E);
20875  case NVPTX::BI__nvvm_atom_add_global_f:
20876  case NVPTX::BI__nvvm_atom_add_global_d:
20877  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f, *this, E);
20878  case NVPTX::BI__nvvm_atom_xchg_global_i:
20879  case NVPTX::BI__nvvm_atom_xchg_global_l:
20880  case NVPTX::BI__nvvm_atom_xchg_global_ll:
20881  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i, *this, E);
20882  case NVPTX::BI__nvvm_atom_xchg_global_f:
20883  case NVPTX::BI__nvvm_atom_xchg_global_d:
20884  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f, *this, E);
20885  case NVPTX::BI__nvvm_atom_max_global_i:
20886  case NVPTX::BI__nvvm_atom_max_global_l:
20887  case NVPTX::BI__nvvm_atom_max_global_ll:
20888  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i, *this, E);
20889  case NVPTX::BI__nvvm_atom_max_global_ui:
20890  case NVPTX::BI__nvvm_atom_max_global_ul:
20891  case NVPTX::BI__nvvm_atom_max_global_ull:
20892  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui, *this, E);
20893  case NVPTX::BI__nvvm_atom_min_global_i:
20894  case NVPTX::BI__nvvm_atom_min_global_l:
20895  case NVPTX::BI__nvvm_atom_min_global_ll:
20896  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i, *this, E);
20897  case NVPTX::BI__nvvm_atom_min_global_ui:
20898  case NVPTX::BI__nvvm_atom_min_global_ul:
20899  case NVPTX::BI__nvvm_atom_min_global_ull:
20900  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui, *this, E);
20901  case NVPTX::BI__nvvm_atom_inc_global_ui:
20902  case NVPTX::BI__nvvm_atom_inc_global_ul:
20903  case NVPTX::BI__nvvm_atom_inc_global_ull:
20904  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i, *this, E);
20905  case NVPTX::BI__nvvm_atom_dec_global_ui:
20906  case NVPTX::BI__nvvm_atom_dec_global_ul:
20907  case NVPTX::BI__nvvm_atom_dec_global_ull:
20908  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i, *this, E);
20909  case NVPTX::BI__nvvm_atom_and_global_i:
20910  case NVPTX::BI__nvvm_atom_and_global_l:
20911  case NVPTX::BI__nvvm_atom_and_global_ll:
20912  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i, *this, E);
20913  case NVPTX::BI__nvvm_atom_or_global_i:
20914  case NVPTX::BI__nvvm_atom_or_global_l:
20915  case NVPTX::BI__nvvm_atom_or_global_ll:
20916  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i, *this, E);
20917  case NVPTX::BI__nvvm_atom_xor_global_i:
20918  case NVPTX::BI__nvvm_atom_xor_global_l:
20919  case NVPTX::BI__nvvm_atom_xor_global_ll:
20920  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i, *this, E);
20921  case NVPTX::BI__nvvm_atom_cas_global_i:
20922  case NVPTX::BI__nvvm_atom_cas_global_l:
20923  case NVPTX::BI__nvvm_atom_cas_global_ll:
20924  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i);
20925  case NVPTX::BI__nvvm_atom_cas_global_f:
20926  case NVPTX::BI__nvvm_atom_cas_global_d:
20927  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f);
20928  case NVPTX::BI__nvvm_atom_cta_add_global_i:
20929  case NVPTX::BI__nvvm_atom_cta_add_global_l:
20930  case NVPTX::BI__nvvm_atom_cta_add_global_ll:
20931  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_cta, *this, E);
20932  case NVPTX::BI__nvvm_atom_sys_add_global_i:
20933  case NVPTX::BI__nvvm_atom_sys_add_global_l:
20934  case NVPTX::BI__nvvm_atom_sys_add_global_ll:
20935  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_sys, *this, E);
20936  case NVPTX::BI__nvvm_atom_cta_add_global_f:
20937  case NVPTX::BI__nvvm_atom_cta_add_global_d:
20938  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_cta, *this, E);
20939  case NVPTX::BI__nvvm_atom_sys_add_global_f:
20940  case NVPTX::BI__nvvm_atom_sys_add_global_d:
20941  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_sys, *this, E);
20942  case NVPTX::BI__nvvm_atom_cta_xchg_global_i:
20943  case NVPTX::BI__nvvm_atom_cta_xchg_global_l:
20944  case NVPTX::BI__nvvm_atom_cta_xchg_global_ll:
20945  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_cta, *this, E);
20946  case NVPTX::BI__nvvm_atom_cta_xchg_global_f:
20947  case NVPTX::BI__nvvm_atom_cta_xchg_global_d:
20948  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_cta, *this, E);
20949  case NVPTX::BI__nvvm_atom_sys_xchg_global_i:
20950  case NVPTX::BI__nvvm_atom_sys_xchg_global_l:
20951  case NVPTX::BI__nvvm_atom_sys_xchg_global_ll:
20952  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_sys, *this, E);
20953  case NVPTX::BI__nvvm_atom_sys_xchg_global_f:
20954  case NVPTX::BI__nvvm_atom_sys_xchg_global_d:
20955  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_sys, *this, E);
20956  case NVPTX::BI__nvvm_atom_cta_max_global_i:
20957  case NVPTX::BI__nvvm_atom_cta_max_global_l:
20958  case NVPTX::BI__nvvm_atom_cta_max_global_ll:
20959  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_cta, *this, E);
20960  case NVPTX::BI__nvvm_atom_cta_max_global_ui:
20961  case NVPTX::BI__nvvm_atom_cta_max_global_ul:
20962  case NVPTX::BI__nvvm_atom_cta_max_global_ull:
20963  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_cta, *this, E);
20964  case NVPTX::BI__nvvm_atom_sys_max_global_i:
20965  case NVPTX::BI__nvvm_atom_sys_max_global_l:
20966  case NVPTX::BI__nvvm_atom_sys_max_global_ll:
20967  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_sys, *this, E);
20968  case NVPTX::BI__nvvm_atom_sys_max_global_ui:
20969  case NVPTX::BI__nvvm_atom_sys_max_global_ul:
20970  case NVPTX::BI__nvvm_atom_sys_max_global_ull:
20971  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_sys, *this, E);
20972  case NVPTX::BI__nvvm_atom_cta_min_global_i:
20973  case NVPTX::BI__nvvm_atom_cta_min_global_l:
20974  case NVPTX::BI__nvvm_atom_cta_min_global_ll:
20975  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_cta, *this, E);
20976  case NVPTX::BI__nvvm_atom_cta_min_global_ui:
20977  case NVPTX::BI__nvvm_atom_cta_min_global_ul:
20978  case NVPTX::BI__nvvm_atom_cta_min_global_ull:
20979  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_cta, *this, E);
20980  case NVPTX::BI__nvvm_atom_sys_min_global_i:
20981  case NVPTX::BI__nvvm_atom_sys_min_global_l:
20982  case NVPTX::BI__nvvm_atom_sys_min_global_ll:
20983  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_sys, *this, E);
20984  case NVPTX::BI__nvvm_atom_sys_min_global_ui:
20985  case NVPTX::BI__nvvm_atom_sys_min_global_ul:
20986  case NVPTX::BI__nvvm_atom_sys_min_global_ull:
20987  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_sys, *this, E);
20988  case NVPTX::BI__nvvm_atom_cta_inc_global_ui:
20989  case NVPTX::BI__nvvm_atom_cta_inc_global_ul:
20990  case NVPTX::BI__nvvm_atom_cta_inc_global_ull:
20991  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_cta, *this, E);
20992  case NVPTX::BI__nvvm_atom_cta_dec_global_ui:
20993  case NVPTX::BI__nvvm_atom_cta_dec_global_ul:
20994  case NVPTX::BI__nvvm_atom_cta_dec_global_ull:
20995  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_cta, *this, E);
20996  case NVPTX::BI__nvvm_atom_sys_inc_global_ui:
20997  case NVPTX::BI__nvvm_atom_sys_inc_global_ul:
20998  case NVPTX::BI__nvvm_atom_sys_inc_global_ull:
20999  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_sys, *this, E);
21000  case NVPTX::BI__nvvm_atom_sys_dec_global_ui:
21001  case NVPTX::BI__nvvm_atom_sys_dec_global_ul:
21002  case NVPTX::BI__nvvm_atom_sys_dec_global_ull:
21003  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_sys, *this, E);
21004  case NVPTX::BI__nvvm_atom_cta_and_global_i:
21005  case NVPTX::BI__nvvm_atom_cta_and_global_l:
21006  case NVPTX::BI__nvvm_atom_cta_and_global_ll:
21007  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_cta, *this, E);
21008  case NVPTX::BI__nvvm_atom_sys_and_global_i:
21009  case NVPTX::BI__nvvm_atom_sys_and_global_l:
21010  case NVPTX::BI__nvvm_atom_sys_and_global_ll:
21011  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_sys, *this, E);
21012  case NVPTX::BI__nvvm_atom_cta_or_global_i:
21013  case NVPTX::BI__nvvm_atom_cta_or_global_l:
21014  case NVPTX::BI__nvvm_atom_cta_or_global_ll:
21015  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_cta, *this, E);
21016  case NVPTX::BI__nvvm_atom_sys_or_global_i:
21017  case NVPTX::BI__nvvm_atom_sys_or_global_l:
21018  case NVPTX::BI__nvvm_atom_sys_or_global_ll:
21019  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_sys, *this, E);
21020  case NVPTX::BI__nvvm_atom_cta_xor_global_i:
21021  case NVPTX::BI__nvvm_atom_cta_xor_global_l:
21022  case NVPTX::BI__nvvm_atom_cta_xor_global_ll:
21023  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_cta, *this, E);
21024  case NVPTX::BI__nvvm_atom_sys_xor_global_i:
21025  case NVPTX::BI__nvvm_atom_sys_xor_global_l:
21026  case NVPTX::BI__nvvm_atom_sys_xor_global_ll:
21027  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_sys, *this, E);
21028  case NVPTX::BI__nvvm_atom_cta_cas_global_i:
21029  case NVPTX::BI__nvvm_atom_cta_cas_global_l:
21030  case NVPTX::BI__nvvm_atom_cta_cas_global_ll:
21031  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_cta);
21032  case NVPTX::BI__nvvm_atom_sys_cas_global_i:
21033  case NVPTX::BI__nvvm_atom_sys_cas_global_l:
21034  case NVPTX::BI__nvvm_atom_sys_cas_global_ll:
21035  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_sys);
21036  case NVPTX::BI__nvvm_atom_cta_cas_global_f:
21037  case NVPTX::BI__nvvm_atom_cta_cas_global_d:
21038  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_cta);
21039  case NVPTX::BI__nvvm_atom_sys_cas_global_f:
21040  case NVPTX::BI__nvvm_atom_sys_cas_global_d:
21041  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_sys);
21042  case NVPTX::BI__nvvm_atom_acquire_add_global_i:
21043  case NVPTX::BI__nvvm_atom_acquire_add_global_l:
21044  case NVPTX::BI__nvvm_atom_acquire_add_global_ll:
21045  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_acquire, *this, E);
21046  case NVPTX::BI__nvvm_atom_acquire_add_global_f:
21047  case NVPTX::BI__nvvm_atom_acquire_add_global_d:
21048  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_acquire, *this, E);
21049  case NVPTX::BI__nvvm_atom_acquire_xchg_global_i:
21050  case NVPTX::BI__nvvm_atom_acquire_xchg_global_l:
21051  case NVPTX::BI__nvvm_atom_acquire_xchg_global_ll:
21052  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_acquire, *this, E);
21053  case NVPTX::BI__nvvm_atom_acquire_xchg_global_f:
21054  case NVPTX::BI__nvvm_atom_acquire_xchg_global_d:
21055  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_acquire, *this, E);
21056  case NVPTX::BI__nvvm_atom_acquire_max_global_i:
21057  case NVPTX::BI__nvvm_atom_acquire_max_global_l:
21058  case NVPTX::BI__nvvm_atom_acquire_max_global_ll:
21059  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_acquire, *this, E);
21060  case NVPTX::BI__nvvm_atom_acquire_max_global_ui:
21061  case NVPTX::BI__nvvm_atom_acquire_max_global_ul:
21062  case NVPTX::BI__nvvm_atom_acquire_max_global_ull:
21063  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_acquire, *this, E);
21064  case NVPTX::BI__nvvm_atom_acquire_min_global_i:
21065  case NVPTX::BI__nvvm_atom_acquire_min_global_l:
21066  case NVPTX::BI__nvvm_atom_acquire_min_global_ll:
21067  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_acquire, *this, E);
21068  case NVPTX::BI__nvvm_atom_acquire_min_global_ui:
21069  case NVPTX::BI__nvvm_atom_acquire_min_global_ul:
21070  case NVPTX::BI__nvvm_atom_acquire_min_global_ull:
21071  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_acquire, *this, E);
21072  case NVPTX::BI__nvvm_atom_acquire_inc_global_ui:
21073  case NVPTX::BI__nvvm_atom_acquire_inc_global_ul:
21074  case NVPTX::BI__nvvm_atom_acquire_inc_global_ull:
21075  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_acquire, *this, E);
21076  case NVPTX::BI__nvvm_atom_acquire_dec_global_ui:
21077  case NVPTX::BI__nvvm_atom_acquire_dec_global_ul:
21078  case NVPTX::BI__nvvm_atom_acquire_dec_global_ull:
21079  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_acquire, *this, E);
21080  case NVPTX::BI__nvvm_atom_acquire_and_global_i:
21081  case NVPTX::BI__nvvm_atom_acquire_and_global_l:
21082  case NVPTX::BI__nvvm_atom_acquire_and_global_ll:
21083  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_acquire, *this, E);
21084  case NVPTX::BI__nvvm_atom_acquire_or_global_i:
21085  case NVPTX::BI__nvvm_atom_acquire_or_global_l:
21086  case NVPTX::BI__nvvm_atom_acquire_or_global_ll:
21087  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_acquire, *this, E);
21088  case NVPTX::BI__nvvm_atom_acquire_xor_global_i:
21089  case NVPTX::BI__nvvm_atom_acquire_xor_global_l:
21090  case NVPTX::BI__nvvm_atom_acquire_xor_global_ll:
21091  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_acquire, *this, E);
21092  case NVPTX::BI__nvvm_atom_acquire_cas_global_i:
21093  case NVPTX::BI__nvvm_atom_acquire_cas_global_l:
21094  case NVPTX::BI__nvvm_atom_acquire_cas_global_ll:
21095  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_acquire);
21096  case NVPTX::BI__nvvm_atom_acquire_cas_global_f:
21097  case NVPTX::BI__nvvm_atom_acquire_cas_global_d:
21098  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_acquire);
21099  case NVPTX::BI__nvvm_atom_acquire_cta_add_global_i:
21100  case NVPTX::BI__nvvm_atom_acquire_cta_add_global_l:
21101  case NVPTX::BI__nvvm_atom_acquire_cta_add_global_ll:
21102  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_acquire_cta, *this, E);
21103  case NVPTX::BI__nvvm_atom_acquire_sys_add_global_i:
21104  case NVPTX::BI__nvvm_atom_acquire_sys_add_global_l:
21105  case NVPTX::BI__nvvm_atom_acquire_sys_add_global_ll:
21106  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_acquire_sys, *this, E);
21107  case NVPTX::BI__nvvm_atom_acquire_cta_add_global_f:
21108  case NVPTX::BI__nvvm_atom_acquire_cta_add_global_d:
21109  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_acquire_cta, *this, E);
21110  case NVPTX::BI__nvvm_atom_acquire_sys_add_global_f:
21111  case NVPTX::BI__nvvm_atom_acquire_sys_add_global_d:
21112  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_acquire_sys, *this, E);
21113  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_global_i:
21114  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_global_l:
21115  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_global_ll:
21116  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_acquire_cta, *this, E);
21117  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_global_f:
21118  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_global_d:
21119  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_acquire_cta, *this, E);
21120  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_global_i:
21121  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_global_l:
21122  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_global_ll:
21123  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_acquire_sys, *this, E);
21124  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_global_f:
21125  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_global_d:
21126  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_acquire_sys, *this, E);
21127  case NVPTX::BI__nvvm_atom_acquire_cta_max_global_i:
21128  case NVPTX::BI__nvvm_atom_acquire_cta_max_global_l:
21129  case NVPTX::BI__nvvm_atom_acquire_cta_max_global_ll:
21130  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_acquire_cta, *this, E);
21131  case NVPTX::BI__nvvm_atom_acquire_cta_max_global_ui:
21132  case NVPTX::BI__nvvm_atom_acquire_cta_max_global_ul:
21133  case NVPTX::BI__nvvm_atom_acquire_cta_max_global_ull:
21134  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_acquire_cta, *this, E);
21135  case NVPTX::BI__nvvm_atom_acquire_sys_max_global_i:
21136  case NVPTX::BI__nvvm_atom_acquire_sys_max_global_l:
21137  case NVPTX::BI__nvvm_atom_acquire_sys_max_global_ll:
21138  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_acquire_sys, *this, E);
21139  case NVPTX::BI__nvvm_atom_acquire_sys_max_global_ui:
21140  case NVPTX::BI__nvvm_atom_acquire_sys_max_global_ul:
21141  case NVPTX::BI__nvvm_atom_acquire_sys_max_global_ull:
21142  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_acquire_sys, *this, E);
21143  case NVPTX::BI__nvvm_atom_acquire_cta_min_global_i:
21144  case NVPTX::BI__nvvm_atom_acquire_cta_min_global_l:
21145  case NVPTX::BI__nvvm_atom_acquire_cta_min_global_ll:
21146  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_acquire_cta, *this, E);
21147  case NVPTX::BI__nvvm_atom_acquire_cta_min_global_ui:
21148  case NVPTX::BI__nvvm_atom_acquire_cta_min_global_ul:
21149  case NVPTX::BI__nvvm_atom_acquire_cta_min_global_ull:
21150  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_acquire_cta, *this, E);
21151  case NVPTX::BI__nvvm_atom_acquire_sys_min_global_i:
21152  case NVPTX::BI__nvvm_atom_acquire_sys_min_global_l:
21153  case NVPTX::BI__nvvm_atom_acquire_sys_min_global_ll:
21154  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_acquire_sys, *this, E);
21155  case NVPTX::BI__nvvm_atom_acquire_sys_min_global_ui:
21156  case NVPTX::BI__nvvm_atom_acquire_sys_min_global_ul:
21157  case NVPTX::BI__nvvm_atom_acquire_sys_min_global_ull:
21158  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_acquire_sys, *this, E);
21159  case NVPTX::BI__nvvm_atom_acquire_cta_inc_global_ui:
21160  case NVPTX::BI__nvvm_atom_acquire_cta_inc_global_ul:
21161  case NVPTX::BI__nvvm_atom_acquire_cta_inc_global_ull:
21162  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_acquire_cta, *this, E);
21163  case NVPTX::BI__nvvm_atom_acquire_cta_dec_global_ui:
21164  case NVPTX::BI__nvvm_atom_acquire_cta_dec_global_ul:
21165  case NVPTX::BI__nvvm_atom_acquire_cta_dec_global_ull:
21166  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_acquire_cta, *this, E);
21167  case NVPTX::BI__nvvm_atom_acquire_sys_inc_global_ui:
21168  case NVPTX::BI__nvvm_atom_acquire_sys_inc_global_ul:
21169  case NVPTX::BI__nvvm_atom_acquire_sys_inc_global_ull:
21170  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_acquire_sys, *this, E);
21171  case NVPTX::BI__nvvm_atom_acquire_sys_dec_global_ui:
21172  case NVPTX::BI__nvvm_atom_acquire_sys_dec_global_ul:
21173  case NVPTX::BI__nvvm_atom_acquire_sys_dec_global_ull:
21174  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_acquire_sys, *this, E);
21175  case NVPTX::BI__nvvm_atom_acquire_cta_and_global_i:
21176  case NVPTX::BI__nvvm_atom_acquire_cta_and_global_l:
21177  case NVPTX::BI__nvvm_atom_acquire_cta_and_global_ll:
21178  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_acquire_cta, *this, E);
21179  case NVPTX::BI__nvvm_atom_acquire_sys_and_global_i:
21180  case NVPTX::BI__nvvm_atom_acquire_sys_and_global_l:
21181  case NVPTX::BI__nvvm_atom_acquire_sys_and_global_ll:
21182  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_acquire_sys, *this, E);
21183  case NVPTX::BI__nvvm_atom_acquire_cta_or_global_i:
21184  case NVPTX::BI__nvvm_atom_acquire_cta_or_global_l:
21185  case NVPTX::BI__nvvm_atom_acquire_cta_or_global_ll:
21186  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_acquire_cta, *this, E);
21187  case NVPTX::BI__nvvm_atom_acquire_sys_or_global_i:
21188  case NVPTX::BI__nvvm_atom_acquire_sys_or_global_l:
21189  case NVPTX::BI__nvvm_atom_acquire_sys_or_global_ll:
21190  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_acquire_sys, *this, E);
21191  case NVPTX::BI__nvvm_atom_acquire_cta_xor_global_i:
21192  case NVPTX::BI__nvvm_atom_acquire_cta_xor_global_l:
21193  case NVPTX::BI__nvvm_atom_acquire_cta_xor_global_ll:
21194  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_acquire_cta, *this, E);
21195  case NVPTX::BI__nvvm_atom_acquire_sys_xor_global_i:
21196  case NVPTX::BI__nvvm_atom_acquire_sys_xor_global_l:
21197  case NVPTX::BI__nvvm_atom_acquire_sys_xor_global_ll:
21198  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_acquire_sys, *this, E);
21199  case NVPTX::BI__nvvm_atom_acquire_cta_cas_global_i:
21200  case NVPTX::BI__nvvm_atom_acquire_cta_cas_global_l:
21201  case NVPTX::BI__nvvm_atom_acquire_cta_cas_global_ll:
21202  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_acquire_cta);
21203  case NVPTX::BI__nvvm_atom_acquire_sys_cas_global_i:
21204  case NVPTX::BI__nvvm_atom_acquire_sys_cas_global_l:
21205  case NVPTX::BI__nvvm_atom_acquire_sys_cas_global_ll:
21206  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_acquire_sys);
21207  case NVPTX::BI__nvvm_atom_acquire_cta_cas_global_f:
21208  case NVPTX::BI__nvvm_atom_acquire_cta_cas_global_d:
21209  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_acquire_cta);
21210  case NVPTX::BI__nvvm_atom_acquire_sys_cas_global_f:
21211  case NVPTX::BI__nvvm_atom_acquire_sys_cas_global_d:
21212  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_acquire_sys);
21213  case NVPTX::BI__nvvm_atom_release_add_global_i:
21214  case NVPTX::BI__nvvm_atom_release_add_global_l:
21215  case NVPTX::BI__nvvm_atom_release_add_global_ll:
21216  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_release, *this, E);
21217  case NVPTX::BI__nvvm_atom_release_add_global_f:
21218  case NVPTX::BI__nvvm_atom_release_add_global_d:
21219  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_release, *this, E);
21220  case NVPTX::BI__nvvm_atom_release_xchg_global_i:
21221  case NVPTX::BI__nvvm_atom_release_xchg_global_l:
21222  case NVPTX::BI__nvvm_atom_release_xchg_global_ll:
21223  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_release, *this, E);
21224  case NVPTX::BI__nvvm_atom_release_xchg_global_f:
21225  case NVPTX::BI__nvvm_atom_release_xchg_global_d:
21226  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_release, *this, E);
21227  case NVPTX::BI__nvvm_atom_release_max_global_i:
21228  case NVPTX::BI__nvvm_atom_release_max_global_l:
21229  case NVPTX::BI__nvvm_atom_release_max_global_ll:
21230  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_release, *this, E);
21231  case NVPTX::BI__nvvm_atom_release_max_global_ui:
21232  case NVPTX::BI__nvvm_atom_release_max_global_ul:
21233  case NVPTX::BI__nvvm_atom_release_max_global_ull:
21234  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_release, *this, E);
21235  case NVPTX::BI__nvvm_atom_release_min_global_i:
21236  case NVPTX::BI__nvvm_atom_release_min_global_l:
21237  case NVPTX::BI__nvvm_atom_release_min_global_ll:
21238  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_release, *this, E);
21239  case NVPTX::BI__nvvm_atom_release_min_global_ui:
21240  case NVPTX::BI__nvvm_atom_release_min_global_ul:
21241  case NVPTX::BI__nvvm_atom_release_min_global_ull:
21242  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_release, *this, E);
21243  case NVPTX::BI__nvvm_atom_release_inc_global_ui:
21244  case NVPTX::BI__nvvm_atom_release_inc_global_ul:
21245  case NVPTX::BI__nvvm_atom_release_inc_global_ull:
21246  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_release, *this, E);
21247  case NVPTX::BI__nvvm_atom_release_dec_global_ui:
21248  case NVPTX::BI__nvvm_atom_release_dec_global_ul:
21249  case NVPTX::BI__nvvm_atom_release_dec_global_ull:
21250  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_release, *this, E);
21251  case NVPTX::BI__nvvm_atom_release_and_global_i:
21252  case NVPTX::BI__nvvm_atom_release_and_global_l:
21253  case NVPTX::BI__nvvm_atom_release_and_global_ll:
21254  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_release, *this, E);
21255  case NVPTX::BI__nvvm_atom_release_or_global_i:
21256  case NVPTX::BI__nvvm_atom_release_or_global_l:
21257  case NVPTX::BI__nvvm_atom_release_or_global_ll:
21258  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_release, *this, E);
21259  case NVPTX::BI__nvvm_atom_release_xor_global_i:
21260  case NVPTX::BI__nvvm_atom_release_xor_global_l:
21261  case NVPTX::BI__nvvm_atom_release_xor_global_ll:
21262  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_release, *this, E);
21263  case NVPTX::BI__nvvm_atom_release_cas_global_i:
21264  case NVPTX::BI__nvvm_atom_release_cas_global_l:
21265  case NVPTX::BI__nvvm_atom_release_cas_global_ll:
21266  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_release);
21267  case NVPTX::BI__nvvm_atom_release_cas_global_f:
21268  case NVPTX::BI__nvvm_atom_release_cas_global_d:
21269  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_release);
21270  case NVPTX::BI__nvvm_atom_release_cta_add_global_i:
21271  case NVPTX::BI__nvvm_atom_release_cta_add_global_l:
21272  case NVPTX::BI__nvvm_atom_release_cta_add_global_ll:
21273  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_release_cta, *this, E);
21274  case NVPTX::BI__nvvm_atom_release_sys_add_global_i:
21275  case NVPTX::BI__nvvm_atom_release_sys_add_global_l:
21276  case NVPTX::BI__nvvm_atom_release_sys_add_global_ll:
21277  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_release_sys, *this, E);
21278  case NVPTX::BI__nvvm_atom_release_cta_add_global_f:
21279  case NVPTX::BI__nvvm_atom_release_cta_add_global_d:
21280  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_release_cta, *this, E);
21281  case NVPTX::BI__nvvm_atom_release_sys_add_global_f:
21282  case NVPTX::BI__nvvm_atom_release_sys_add_global_d:
21283  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_release_sys, *this, E);
21284  case NVPTX::BI__nvvm_atom_release_cta_xchg_global_i:
21285  case NVPTX::BI__nvvm_atom_release_cta_xchg_global_l:
21286  case NVPTX::BI__nvvm_atom_release_cta_xchg_global_ll:
21287  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_release_cta, *this, E);
21288  case NVPTX::BI__nvvm_atom_release_cta_xchg_global_f:
21289  case NVPTX::BI__nvvm_atom_release_cta_xchg_global_d:
21290  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_release_cta, *this, E);
21291  case NVPTX::BI__nvvm_atom_release_sys_xchg_global_i:
21292  case NVPTX::BI__nvvm_atom_release_sys_xchg_global_l:
21293  case NVPTX::BI__nvvm_atom_release_sys_xchg_global_ll:
21294  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_release_sys, *this, E);
21295  case NVPTX::BI__nvvm_atom_release_sys_xchg_global_f:
21296  case NVPTX::BI__nvvm_atom_release_sys_xchg_global_d:
21297  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_release_sys, *this, E);
21298  case NVPTX::BI__nvvm_atom_release_cta_max_global_i:
21299  case NVPTX::BI__nvvm_atom_release_cta_max_global_l:
21300  case NVPTX::BI__nvvm_atom_release_cta_max_global_ll:
21301  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_release_cta, *this, E);
21302  case NVPTX::BI__nvvm_atom_release_cta_max_global_ui:
21303  case NVPTX::BI__nvvm_atom_release_cta_max_global_ul:
21304  case NVPTX::BI__nvvm_atom_release_cta_max_global_ull:
21305  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_release_cta, *this, E);
21306  case NVPTX::BI__nvvm_atom_release_sys_max_global_i:
21307  case NVPTX::BI__nvvm_atom_release_sys_max_global_l:
21308  case NVPTX::BI__nvvm_atom_release_sys_max_global_ll:
21309  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_release_sys, *this, E);
21310  case NVPTX::BI__nvvm_atom_release_sys_max_global_ui:
21311  case NVPTX::BI__nvvm_atom_release_sys_max_global_ul:
21312  case NVPTX::BI__nvvm_atom_release_sys_max_global_ull:
21313  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_release_sys, *this, E);
21314  case NVPTX::BI__nvvm_atom_release_cta_min_global_i:
21315  case NVPTX::BI__nvvm_atom_release_cta_min_global_l:
21316  case NVPTX::BI__nvvm_atom_release_cta_min_global_ll:
21317  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_release_cta, *this, E);
21318  case NVPTX::BI__nvvm_atom_release_cta_min_global_ui:
21319  case NVPTX::BI__nvvm_atom_release_cta_min_global_ul:
21320  case NVPTX::BI__nvvm_atom_release_cta_min_global_ull:
21321  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_release_cta, *this, E);
21322  case NVPTX::BI__nvvm_atom_release_sys_min_global_i:
21323  case NVPTX::BI__nvvm_atom_release_sys_min_global_l:
21324  case NVPTX::BI__nvvm_atom_release_sys_min_global_ll:
21325  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_release_sys, *this, E);
21326  case NVPTX::BI__nvvm_atom_release_sys_min_global_ui:
21327  case NVPTX::BI__nvvm_atom_release_sys_min_global_ul:
21328  case NVPTX::BI__nvvm_atom_release_sys_min_global_ull:
21329  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_release_sys, *this, E);
21330  case NVPTX::BI__nvvm_atom_release_cta_inc_global_ui:
21331  case NVPTX::BI__nvvm_atom_release_cta_inc_global_ul:
21332  case NVPTX::BI__nvvm_atom_release_cta_inc_global_ull:
21333  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_release_cta, *this, E);
21334  case NVPTX::BI__nvvm_atom_release_cta_dec_global_ui:
21335  case NVPTX::BI__nvvm_atom_release_cta_dec_global_ul:
21336  case NVPTX::BI__nvvm_atom_release_cta_dec_global_ull:
21337  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_release_cta, *this, E);
21338  case NVPTX::BI__nvvm_atom_release_sys_inc_global_ui:
21339  case NVPTX::BI__nvvm_atom_release_sys_inc_global_ul:
21340  case NVPTX::BI__nvvm_atom_release_sys_inc_global_ull:
21341  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_release_sys, *this, E);
21342  case NVPTX::BI__nvvm_atom_release_sys_dec_global_ui:
21343  case NVPTX::BI__nvvm_atom_release_sys_dec_global_ul:
21344  case NVPTX::BI__nvvm_atom_release_sys_dec_global_ull:
21345  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_release_sys, *this, E);
21346  case NVPTX::BI__nvvm_atom_release_cta_and_global_i:
21347  case NVPTX::BI__nvvm_atom_release_cta_and_global_l:
21348  case NVPTX::BI__nvvm_atom_release_cta_and_global_ll:
21349  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_release_cta, *this, E);
21350  case NVPTX::BI__nvvm_atom_release_sys_and_global_i:
21351  case NVPTX::BI__nvvm_atom_release_sys_and_global_l:
21352  case NVPTX::BI__nvvm_atom_release_sys_and_global_ll:
21353  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_release_sys, *this, E);
21354  case NVPTX::BI__nvvm_atom_release_cta_or_global_i:
21355  case NVPTX::BI__nvvm_atom_release_cta_or_global_l:
21356  case NVPTX::BI__nvvm_atom_release_cta_or_global_ll:
21357  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_release_cta, *this, E);
21358  case NVPTX::BI__nvvm_atom_release_sys_or_global_i:
21359  case NVPTX::BI__nvvm_atom_release_sys_or_global_l:
21360  case NVPTX::BI__nvvm_atom_release_sys_or_global_ll:
21361  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_release_sys, *this, E);
21362  case NVPTX::BI__nvvm_atom_release_cta_xor_global_i:
21363  case NVPTX::BI__nvvm_atom_release_cta_xor_global_l:
21364  case NVPTX::BI__nvvm_atom_release_cta_xor_global_ll:
21365  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_release_cta, *this, E);
21366  case NVPTX::BI__nvvm_atom_release_sys_xor_global_i:
21367  case NVPTX::BI__nvvm_atom_release_sys_xor_global_l:
21368  case NVPTX::BI__nvvm_atom_release_sys_xor_global_ll:
21369  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_release_sys, *this, E);
21370  case NVPTX::BI__nvvm_atom_release_cta_cas_global_i:
21371  case NVPTX::BI__nvvm_atom_release_cta_cas_global_l:
21372  case NVPTX::BI__nvvm_atom_release_cta_cas_global_ll:
21373  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_release_cta);
21374  case NVPTX::BI__nvvm_atom_release_sys_cas_global_i:
21375  case NVPTX::BI__nvvm_atom_release_sys_cas_global_l:
21376  case NVPTX::BI__nvvm_atom_release_sys_cas_global_ll:
21377  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_release_sys);
21378  case NVPTX::BI__nvvm_atom_release_cta_cas_global_f:
21379  case NVPTX::BI__nvvm_atom_release_cta_cas_global_d:
21380  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_release_cta);
21381  case NVPTX::BI__nvvm_atom_release_sys_cas_global_f:
21382  case NVPTX::BI__nvvm_atom_release_sys_cas_global_d:
21383  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_release_sys);
21384  case NVPTX::BI__nvvm_atom_acq_rel_add_global_i:
21385  case NVPTX::BI__nvvm_atom_acq_rel_add_global_l:
21386  case NVPTX::BI__nvvm_atom_acq_rel_add_global_ll:
21387  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_acq_rel, *this, E);
21388  case NVPTX::BI__nvvm_atom_acq_rel_add_global_f:
21389  case NVPTX::BI__nvvm_atom_acq_rel_add_global_d:
21390  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_acq_rel, *this, E);
21391  case NVPTX::BI__nvvm_atom_acq_rel_xchg_global_i:
21392  case NVPTX::BI__nvvm_atom_acq_rel_xchg_global_l:
21393  case NVPTX::BI__nvvm_atom_acq_rel_xchg_global_ll:
21394  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_acq_rel, *this, E);
21395  case NVPTX::BI__nvvm_atom_acq_rel_xchg_global_f:
21396  case NVPTX::BI__nvvm_atom_acq_rel_xchg_global_d:
21397  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_acq_rel, *this, E);
21398  case NVPTX::BI__nvvm_atom_acq_rel_max_global_i:
21399  case NVPTX::BI__nvvm_atom_acq_rel_max_global_l:
21400  case NVPTX::BI__nvvm_atom_acq_rel_max_global_ll:
21401  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_acq_rel, *this, E);
21402  case NVPTX::BI__nvvm_atom_acq_rel_max_global_ui:
21403  case NVPTX::BI__nvvm_atom_acq_rel_max_global_ul:
21404  case NVPTX::BI__nvvm_atom_acq_rel_max_global_ull:
21405  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_acq_rel, *this, E);
21406  case NVPTX::BI__nvvm_atom_acq_rel_min_global_i:
21407  case NVPTX::BI__nvvm_atom_acq_rel_min_global_l:
21408  case NVPTX::BI__nvvm_atom_acq_rel_min_global_ll:
21409  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_acq_rel, *this, E);
21410  case NVPTX::BI__nvvm_atom_acq_rel_min_global_ui:
21411  case NVPTX::BI__nvvm_atom_acq_rel_min_global_ul:
21412  case NVPTX::BI__nvvm_atom_acq_rel_min_global_ull:
21413  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_acq_rel, *this, E);
21414  case NVPTX::BI__nvvm_atom_acq_rel_inc_global_ui:
21415  case NVPTX::BI__nvvm_atom_acq_rel_inc_global_ul:
21416  case NVPTX::BI__nvvm_atom_acq_rel_inc_global_ull:
21417  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_acq_rel, *this, E);
21418  case NVPTX::BI__nvvm_atom_acq_rel_dec_global_ui:
21419  case NVPTX::BI__nvvm_atom_acq_rel_dec_global_ul:
21420  case NVPTX::BI__nvvm_atom_acq_rel_dec_global_ull:
21421  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_acq_rel, *this, E);
21422  case NVPTX::BI__nvvm_atom_acq_rel_and_global_i:
21423  case NVPTX::BI__nvvm_atom_acq_rel_and_global_l:
21424  case NVPTX::BI__nvvm_atom_acq_rel_and_global_ll:
21425  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_acq_rel, *this, E);
21426  case NVPTX::BI__nvvm_atom_acq_rel_or_global_i:
21427  case NVPTX::BI__nvvm_atom_acq_rel_or_global_l:
21428  case NVPTX::BI__nvvm_atom_acq_rel_or_global_ll:
21429  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_acq_rel, *this, E);
21430  case NVPTX::BI__nvvm_atom_acq_rel_xor_global_i:
21431  case NVPTX::BI__nvvm_atom_acq_rel_xor_global_l:
21432  case NVPTX::BI__nvvm_atom_acq_rel_xor_global_ll:
21433  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_acq_rel, *this, E);
21434  case NVPTX::BI__nvvm_atom_acq_rel_cas_global_i:
21435  case NVPTX::BI__nvvm_atom_acq_rel_cas_global_l:
21436  case NVPTX::BI__nvvm_atom_acq_rel_cas_global_ll:
21437  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_acq_rel);
21438  case NVPTX::BI__nvvm_atom_acq_rel_cas_global_f:
21439  case NVPTX::BI__nvvm_atom_acq_rel_cas_global_d:
21440  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_acq_rel);
21441  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_global_i:
21442  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_global_l:
21443  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_global_ll:
21444  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_acq_rel_cta, *this, E);
21445  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_global_i:
21446  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_global_l:
21447  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_global_ll:
21448  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_i_acq_rel_sys, *this, E);
21449  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_global_f:
21450  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_global_d:
21451  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_acq_rel_cta, *this, E);
21452  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_global_f:
21453  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_global_d:
21454  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_global_f_acq_rel_sys, *this, E);
21455  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_global_i:
21456  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_global_l:
21457  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_global_ll:
21458  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_acq_rel_cta, *this, E);
21459  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_global_f:
21460  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_global_d:
21461  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_acq_rel_cta, *this, E);
21462  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_global_i:
21463  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_global_l:
21464  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_global_ll:
21465  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_i_acq_rel_sys, *this, E);
21466  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_global_f:
21467  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_global_d:
21468  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_global_f_acq_rel_sys, *this, E);
21469  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_global_i:
21470  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_global_l:
21471  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_global_ll:
21472  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_acq_rel_cta, *this, E);
21473  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_global_ui:
21474  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_global_ul:
21475  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_global_ull:
21476  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_acq_rel_cta, *this, E);
21477  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_global_i:
21478  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_global_l:
21479  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_global_ll:
21480  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_i_acq_rel_sys, *this, E);
21481  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_global_ui:
21482  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_global_ul:
21483  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_global_ull:
21484  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_global_ui_acq_rel_sys, *this, E);
21485  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_global_i:
21486  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_global_l:
21487  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_global_ll:
21488  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_acq_rel_cta, *this, E);
21489  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_global_ui:
21490  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_global_ul:
21491  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_global_ull:
21492  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_acq_rel_cta, *this, E);
21493  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_global_i:
21494  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_global_l:
21495  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_global_ll:
21496  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_i_acq_rel_sys, *this, E);
21497  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_global_ui:
21498  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_global_ul:
21499  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_global_ull:
21500  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_global_ui_acq_rel_sys, *this, E);
21501  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_global_ui:
21502  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_global_ul:
21503  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_global_ull:
21504  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_acq_rel_cta, *this, E);
21505  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_global_ui:
21506  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_global_ul:
21507  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_global_ull:
21508  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_acq_rel_cta, *this, E);
21509  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_global_ui:
21510  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_global_ul:
21511  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_global_ull:
21512  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_global_i_acq_rel_sys, *this, E);
21513  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_global_ui:
21514  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_global_ul:
21515  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_global_ull:
21516  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_global_i_acq_rel_sys, *this, E);
21517  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_global_i:
21518  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_global_l:
21519  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_global_ll:
21520  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_acq_rel_cta, *this, E);
21521  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_global_i:
21522  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_global_l:
21523  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_global_ll:
21524  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_global_i_acq_rel_sys, *this, E);
21525  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_global_i:
21526  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_global_l:
21527  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_global_ll:
21528  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_acq_rel_cta, *this, E);
21529  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_global_i:
21530  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_global_l:
21531  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_global_ll:
21532  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_global_i_acq_rel_sys, *this, E);
21533  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_global_i:
21534  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_global_l:
21535  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_global_ll:
21536  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_acq_rel_cta, *this, E);
21537  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_global_i:
21538  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_global_l:
21539  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_global_ll:
21540  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_global_i_acq_rel_sys, *this, E);
21541  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_global_i:
21542  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_global_l:
21543  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_global_ll:
21544  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_acq_rel_cta);
21545  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_global_i:
21546  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_global_l:
21547  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_global_ll:
21548  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_i_acq_rel_sys);
21549  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_global_f:
21550  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_global_d:
21551  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_acq_rel_cta);
21552  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_global_f:
21553  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_global_d:
21554  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_global_f_acq_rel_sys);
21555  case NVPTX::BI__nvvm_atom_add_shared_i:
21556  case NVPTX::BI__nvvm_atom_add_shared_l:
21557  case NVPTX::BI__nvvm_atom_add_shared_ll:
21558  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i, *this, E);
21559  case NVPTX::BI__nvvm_atom_add_shared_f:
21560  case NVPTX::BI__nvvm_atom_add_shared_d:
21561  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f, *this, E);
21562  case NVPTX::BI__nvvm_atom_xchg_shared_i:
21563  case NVPTX::BI__nvvm_atom_xchg_shared_l:
21564  case NVPTX::BI__nvvm_atom_xchg_shared_ll:
21565  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i, *this, E);
21566  case NVPTX::BI__nvvm_atom_xchg_shared_f:
21567  case NVPTX::BI__nvvm_atom_xchg_shared_d:
21568  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f, *this, E);
21569  case NVPTX::BI__nvvm_atom_max_shared_i:
21570  case NVPTX::BI__nvvm_atom_max_shared_l:
21571  case NVPTX::BI__nvvm_atom_max_shared_ll:
21572  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i, *this, E);
21573  case NVPTX::BI__nvvm_atom_max_shared_ui:
21574  case NVPTX::BI__nvvm_atom_max_shared_ul:
21575  case NVPTX::BI__nvvm_atom_max_shared_ull:
21576  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui, *this, E);
21577  case NVPTX::BI__nvvm_atom_min_shared_i:
21578  case NVPTX::BI__nvvm_atom_min_shared_l:
21579  case NVPTX::BI__nvvm_atom_min_shared_ll:
21580  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i, *this, E);
21581  case NVPTX::BI__nvvm_atom_min_shared_ui:
21582  case NVPTX::BI__nvvm_atom_min_shared_ul:
21583  case NVPTX::BI__nvvm_atom_min_shared_ull:
21584  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui, *this, E);
21585  case NVPTX::BI__nvvm_atom_inc_shared_ui:
21586  case NVPTX::BI__nvvm_atom_inc_shared_ul:
21587  case NVPTX::BI__nvvm_atom_inc_shared_ull:
21588  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i, *this, E);
21589  case NVPTX::BI__nvvm_atom_dec_shared_ui:
21590  case NVPTX::BI__nvvm_atom_dec_shared_ul:
21591  case NVPTX::BI__nvvm_atom_dec_shared_ull:
21592  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i, *this, E);
21593  case NVPTX::BI__nvvm_atom_and_shared_i:
21594  case NVPTX::BI__nvvm_atom_and_shared_l:
21595  case NVPTX::BI__nvvm_atom_and_shared_ll:
21596  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i, *this, E);
21597  case NVPTX::BI__nvvm_atom_or_shared_i:
21598  case NVPTX::BI__nvvm_atom_or_shared_l:
21599  case NVPTX::BI__nvvm_atom_or_shared_ll:
21600  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i, *this, E);
21601  case NVPTX::BI__nvvm_atom_xor_shared_i:
21602  case NVPTX::BI__nvvm_atom_xor_shared_l:
21603  case NVPTX::BI__nvvm_atom_xor_shared_ll:
21604  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i, *this, E);
21605  case NVPTX::BI__nvvm_atom_cas_shared_i:
21606  case NVPTX::BI__nvvm_atom_cas_shared_l:
21607  case NVPTX::BI__nvvm_atom_cas_shared_ll:
21608  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i);
21609  case NVPTX::BI__nvvm_atom_cas_shared_f:
21610  case NVPTX::BI__nvvm_atom_cas_shared_d:
21611  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f);
21612  case NVPTX::BI__nvvm_atom_cta_add_shared_i:
21613  case NVPTX::BI__nvvm_atom_cta_add_shared_l:
21614  case NVPTX::BI__nvvm_atom_cta_add_shared_ll:
21615  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_cta, *this, E);
21616  case NVPTX::BI__nvvm_atom_sys_add_shared_i:
21617  case NVPTX::BI__nvvm_atom_sys_add_shared_l:
21618  case NVPTX::BI__nvvm_atom_sys_add_shared_ll:
21619  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_sys, *this, E);
21620  case NVPTX::BI__nvvm_atom_cta_add_shared_f:
21621  case NVPTX::BI__nvvm_atom_cta_add_shared_d:
21622  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_cta, *this, E);
21623  case NVPTX::BI__nvvm_atom_sys_add_shared_f:
21624  case NVPTX::BI__nvvm_atom_sys_add_shared_d:
21625  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_sys, *this, E);
21626  case NVPTX::BI__nvvm_atom_cta_xchg_shared_i:
21627  case NVPTX::BI__nvvm_atom_cta_xchg_shared_l:
21628  case NVPTX::BI__nvvm_atom_cta_xchg_shared_ll:
21629  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_cta, *this, E);
21630  case NVPTX::BI__nvvm_atom_cta_xchg_shared_f:
21631  case NVPTX::BI__nvvm_atom_cta_xchg_shared_d:
21632  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_cta, *this, E);
21633  case NVPTX::BI__nvvm_atom_sys_xchg_shared_i:
21634  case NVPTX::BI__nvvm_atom_sys_xchg_shared_l:
21635  case NVPTX::BI__nvvm_atom_sys_xchg_shared_ll:
21636  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_sys, *this, E);
21637  case NVPTX::BI__nvvm_atom_sys_xchg_shared_f:
21638  case NVPTX::BI__nvvm_atom_sys_xchg_shared_d:
21639  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_sys, *this, E);
21640  case NVPTX::BI__nvvm_atom_cta_max_shared_i:
21641  case NVPTX::BI__nvvm_atom_cta_max_shared_l:
21642  case NVPTX::BI__nvvm_atom_cta_max_shared_ll:
21643  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_cta, *this, E);
21644  case NVPTX::BI__nvvm_atom_cta_max_shared_ui:
21645  case NVPTX::BI__nvvm_atom_cta_max_shared_ul:
21646  case NVPTX::BI__nvvm_atom_cta_max_shared_ull:
21647  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_cta, *this, E);
21648  case NVPTX::BI__nvvm_atom_sys_max_shared_i:
21649  case NVPTX::BI__nvvm_atom_sys_max_shared_l:
21650  case NVPTX::BI__nvvm_atom_sys_max_shared_ll:
21651  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_sys, *this, E);
21652  case NVPTX::BI__nvvm_atom_sys_max_shared_ui:
21653  case NVPTX::BI__nvvm_atom_sys_max_shared_ul:
21654  case NVPTX::BI__nvvm_atom_sys_max_shared_ull:
21655  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_sys, *this, E);
21656  case NVPTX::BI__nvvm_atom_cta_min_shared_i:
21657  case NVPTX::BI__nvvm_atom_cta_min_shared_l:
21658  case NVPTX::BI__nvvm_atom_cta_min_shared_ll:
21659  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_cta, *this, E);
21660  case NVPTX::BI__nvvm_atom_cta_min_shared_ui:
21661  case NVPTX::BI__nvvm_atom_cta_min_shared_ul:
21662  case NVPTX::BI__nvvm_atom_cta_min_shared_ull:
21663  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_cta, *this, E);
21664  case NVPTX::BI__nvvm_atom_sys_min_shared_i:
21665  case NVPTX::BI__nvvm_atom_sys_min_shared_l:
21666  case NVPTX::BI__nvvm_atom_sys_min_shared_ll:
21667  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_sys, *this, E);
21668  case NVPTX::BI__nvvm_atom_sys_min_shared_ui:
21669  case NVPTX::BI__nvvm_atom_sys_min_shared_ul:
21670  case NVPTX::BI__nvvm_atom_sys_min_shared_ull:
21671  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_sys, *this, E);
21672  case NVPTX::BI__nvvm_atom_cta_inc_shared_ui:
21673  case NVPTX::BI__nvvm_atom_cta_inc_shared_ul:
21674  case NVPTX::BI__nvvm_atom_cta_inc_shared_ull:
21675  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_cta, *this, E);
21676  case NVPTX::BI__nvvm_atom_cta_dec_shared_ui:
21677  case NVPTX::BI__nvvm_atom_cta_dec_shared_ul:
21678  case NVPTX::BI__nvvm_atom_cta_dec_shared_ull:
21679  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_cta, *this, E);
21680  case NVPTX::BI__nvvm_atom_sys_inc_shared_ui:
21681  case NVPTX::BI__nvvm_atom_sys_inc_shared_ul:
21682  case NVPTX::BI__nvvm_atom_sys_inc_shared_ull:
21683  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_sys, *this, E);
21684  case NVPTX::BI__nvvm_atom_sys_dec_shared_ui:
21685  case NVPTX::BI__nvvm_atom_sys_dec_shared_ul:
21686  case NVPTX::BI__nvvm_atom_sys_dec_shared_ull:
21687  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_sys, *this, E);
21688  case NVPTX::BI__nvvm_atom_cta_and_shared_i:
21689  case NVPTX::BI__nvvm_atom_cta_and_shared_l:
21690  case NVPTX::BI__nvvm_atom_cta_and_shared_ll:
21691  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_cta, *this, E);
21692  case NVPTX::BI__nvvm_atom_sys_and_shared_i:
21693  case NVPTX::BI__nvvm_atom_sys_and_shared_l:
21694  case NVPTX::BI__nvvm_atom_sys_and_shared_ll:
21695  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_sys, *this, E);
21696  case NVPTX::BI__nvvm_atom_cta_or_shared_i:
21697  case NVPTX::BI__nvvm_atom_cta_or_shared_l:
21698  case NVPTX::BI__nvvm_atom_cta_or_shared_ll:
21699  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_cta, *this, E);
21700  case NVPTX::BI__nvvm_atom_sys_or_shared_i:
21701  case NVPTX::BI__nvvm_atom_sys_or_shared_l:
21702  case NVPTX::BI__nvvm_atom_sys_or_shared_ll:
21703  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_sys, *this, E);
21704  case NVPTX::BI__nvvm_atom_cta_xor_shared_i:
21705  case NVPTX::BI__nvvm_atom_cta_xor_shared_l:
21706  case NVPTX::BI__nvvm_atom_cta_xor_shared_ll:
21707  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_cta, *this, E);
21708  case NVPTX::BI__nvvm_atom_sys_xor_shared_i:
21709  case NVPTX::BI__nvvm_atom_sys_xor_shared_l:
21710  case NVPTX::BI__nvvm_atom_sys_xor_shared_ll:
21711  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_sys, *this, E);
21712  case NVPTX::BI__nvvm_atom_cta_cas_shared_i:
21713  case NVPTX::BI__nvvm_atom_cta_cas_shared_l:
21714  case NVPTX::BI__nvvm_atom_cta_cas_shared_ll:
21715  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_cta);
21716  case NVPTX::BI__nvvm_atom_sys_cas_shared_i:
21717  case NVPTX::BI__nvvm_atom_sys_cas_shared_l:
21718  case NVPTX::BI__nvvm_atom_sys_cas_shared_ll:
21719  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_sys);
21720  case NVPTX::BI__nvvm_atom_cta_cas_shared_f:
21721  case NVPTX::BI__nvvm_atom_cta_cas_shared_d:
21722  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_cta);
21723  case NVPTX::BI__nvvm_atom_sys_cas_shared_f:
21724  case NVPTX::BI__nvvm_atom_sys_cas_shared_d:
21725  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_sys);
21726  case NVPTX::BI__nvvm_atom_acquire_add_shared_i:
21727  case NVPTX::BI__nvvm_atom_acquire_add_shared_l:
21728  case NVPTX::BI__nvvm_atom_acquire_add_shared_ll:
21729  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_acquire, *this, E);
21730  case NVPTX::BI__nvvm_atom_acquire_add_shared_f:
21731  case NVPTX::BI__nvvm_atom_acquire_add_shared_d:
21732  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_acquire, *this, E);
21733  case NVPTX::BI__nvvm_atom_acquire_xchg_shared_i:
21734  case NVPTX::BI__nvvm_atom_acquire_xchg_shared_l:
21735  case NVPTX::BI__nvvm_atom_acquire_xchg_shared_ll:
21736  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_acquire, *this, E);
21737  case NVPTX::BI__nvvm_atom_acquire_xchg_shared_f:
21738  case NVPTX::BI__nvvm_atom_acquire_xchg_shared_d:
21739  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_acquire, *this, E);
21740  case NVPTX::BI__nvvm_atom_acquire_max_shared_i:
21741  case NVPTX::BI__nvvm_atom_acquire_max_shared_l:
21742  case NVPTX::BI__nvvm_atom_acquire_max_shared_ll:
21743  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_acquire, *this, E);
21744  case NVPTX::BI__nvvm_atom_acquire_max_shared_ui:
21745  case NVPTX::BI__nvvm_atom_acquire_max_shared_ul:
21746  case NVPTX::BI__nvvm_atom_acquire_max_shared_ull:
21747  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_acquire, *this, E);
21748  case NVPTX::BI__nvvm_atom_acquire_min_shared_i:
21749  case NVPTX::BI__nvvm_atom_acquire_min_shared_l:
21750  case NVPTX::BI__nvvm_atom_acquire_min_shared_ll:
21751  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_acquire, *this, E);
21752  case NVPTX::BI__nvvm_atom_acquire_min_shared_ui:
21753  case NVPTX::BI__nvvm_atom_acquire_min_shared_ul:
21754  case NVPTX::BI__nvvm_atom_acquire_min_shared_ull:
21755  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_acquire, *this, E);
21756  case NVPTX::BI__nvvm_atom_acquire_inc_shared_ui:
21757  case NVPTX::BI__nvvm_atom_acquire_inc_shared_ul:
21758  case NVPTX::BI__nvvm_atom_acquire_inc_shared_ull:
21759  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_acquire, *this, E);
21760  case NVPTX::BI__nvvm_atom_acquire_dec_shared_ui:
21761  case NVPTX::BI__nvvm_atom_acquire_dec_shared_ul:
21762  case NVPTX::BI__nvvm_atom_acquire_dec_shared_ull:
21763  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_acquire, *this, E);
21764  case NVPTX::BI__nvvm_atom_acquire_and_shared_i:
21765  case NVPTX::BI__nvvm_atom_acquire_and_shared_l:
21766  case NVPTX::BI__nvvm_atom_acquire_and_shared_ll:
21767  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_acquire, *this, E);
21768  case NVPTX::BI__nvvm_atom_acquire_or_shared_i:
21769  case NVPTX::BI__nvvm_atom_acquire_or_shared_l:
21770  case NVPTX::BI__nvvm_atom_acquire_or_shared_ll:
21771  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_acquire, *this, E);
21772  case NVPTX::BI__nvvm_atom_acquire_xor_shared_i:
21773  case NVPTX::BI__nvvm_atom_acquire_xor_shared_l:
21774  case NVPTX::BI__nvvm_atom_acquire_xor_shared_ll:
21775  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_acquire, *this, E);
21776  case NVPTX::BI__nvvm_atom_acquire_cas_shared_i:
21777  case NVPTX::BI__nvvm_atom_acquire_cas_shared_l:
21778  case NVPTX::BI__nvvm_atom_acquire_cas_shared_ll:
21779  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_acquire);
21780  case NVPTX::BI__nvvm_atom_acquire_cas_shared_f:
21781  case NVPTX::BI__nvvm_atom_acquire_cas_shared_d:
21782  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_acquire);
21783  case NVPTX::BI__nvvm_atom_acquire_cta_add_shared_i:
21784  case NVPTX::BI__nvvm_atom_acquire_cta_add_shared_l:
21785  case NVPTX::BI__nvvm_atom_acquire_cta_add_shared_ll:
21786  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_acquire_cta, *this, E);
21787  case NVPTX::BI__nvvm_atom_acquire_sys_add_shared_i:
21788  case NVPTX::BI__nvvm_atom_acquire_sys_add_shared_l:
21789  case NVPTX::BI__nvvm_atom_acquire_sys_add_shared_ll:
21790  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_acquire_sys, *this, E);
21791  case NVPTX::BI__nvvm_atom_acquire_cta_add_shared_f:
21792  case NVPTX::BI__nvvm_atom_acquire_cta_add_shared_d:
21793  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_acquire_cta, *this, E);
21794  case NVPTX::BI__nvvm_atom_acquire_sys_add_shared_f:
21795  case NVPTX::BI__nvvm_atom_acquire_sys_add_shared_d:
21796  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_acquire_sys, *this, E);
21797  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_shared_i:
21798  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_shared_l:
21799  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_shared_ll:
21800  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_acquire_cta, *this, E);
21801  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_shared_f:
21802  case NVPTX::BI__nvvm_atom_acquire_cta_xchg_shared_d:
21803  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_acquire_cta, *this, E);
21804  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_shared_i:
21805  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_shared_l:
21806  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_shared_ll:
21807  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_acquire_sys, *this, E);
21808  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_shared_f:
21809  case NVPTX::BI__nvvm_atom_acquire_sys_xchg_shared_d:
21810  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_acquire_sys, *this, E);
21811  case NVPTX::BI__nvvm_atom_acquire_cta_max_shared_i:
21812  case NVPTX::BI__nvvm_atom_acquire_cta_max_shared_l:
21813  case NVPTX::BI__nvvm_atom_acquire_cta_max_shared_ll:
21814  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_acquire_cta, *this, E);
21815  case NVPTX::BI__nvvm_atom_acquire_cta_max_shared_ui:
21816  case NVPTX::BI__nvvm_atom_acquire_cta_max_shared_ul:
21817  case NVPTX::BI__nvvm_atom_acquire_cta_max_shared_ull:
21818  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_acquire_cta, *this, E);
21819  case NVPTX::BI__nvvm_atom_acquire_sys_max_shared_i:
21820  case NVPTX::BI__nvvm_atom_acquire_sys_max_shared_l:
21821  case NVPTX::BI__nvvm_atom_acquire_sys_max_shared_ll:
21822  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_acquire_sys, *this, E);
21823  case NVPTX::BI__nvvm_atom_acquire_sys_max_shared_ui:
21824  case NVPTX::BI__nvvm_atom_acquire_sys_max_shared_ul:
21825  case NVPTX::BI__nvvm_atom_acquire_sys_max_shared_ull:
21826  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_acquire_sys, *this, E);
21827  case NVPTX::BI__nvvm_atom_acquire_cta_min_shared_i:
21828  case NVPTX::BI__nvvm_atom_acquire_cta_min_shared_l:
21829  case NVPTX::BI__nvvm_atom_acquire_cta_min_shared_ll:
21830  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_acquire_cta, *this, E);
21831  case NVPTX::BI__nvvm_atom_acquire_cta_min_shared_ui:
21832  case NVPTX::BI__nvvm_atom_acquire_cta_min_shared_ul:
21833  case NVPTX::BI__nvvm_atom_acquire_cta_min_shared_ull:
21834  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_acquire_cta, *this, E);
21835  case NVPTX::BI__nvvm_atom_acquire_sys_min_shared_i:
21836  case NVPTX::BI__nvvm_atom_acquire_sys_min_shared_l:
21837  case NVPTX::BI__nvvm_atom_acquire_sys_min_shared_ll:
21838  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_acquire_sys, *this, E);
21839  case NVPTX::BI__nvvm_atom_acquire_sys_min_shared_ui:
21840  case NVPTX::BI__nvvm_atom_acquire_sys_min_shared_ul:
21841  case NVPTX::BI__nvvm_atom_acquire_sys_min_shared_ull:
21842  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_acquire_sys, *this, E);
21843  case NVPTX::BI__nvvm_atom_acquire_cta_inc_shared_ui:
21844  case NVPTX::BI__nvvm_atom_acquire_cta_inc_shared_ul:
21845  case NVPTX::BI__nvvm_atom_acquire_cta_inc_shared_ull:
21846  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_acquire_cta, *this, E);
21847  case NVPTX::BI__nvvm_atom_acquire_cta_dec_shared_ui:
21848  case NVPTX::BI__nvvm_atom_acquire_cta_dec_shared_ul:
21849  case NVPTX::BI__nvvm_atom_acquire_cta_dec_shared_ull:
21850  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_acquire_cta, *this, E);
21851  case NVPTX::BI__nvvm_atom_acquire_sys_inc_shared_ui:
21852  case NVPTX::BI__nvvm_atom_acquire_sys_inc_shared_ul:
21853  case NVPTX::BI__nvvm_atom_acquire_sys_inc_shared_ull:
21854  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_acquire_sys, *this, E);
21855  case NVPTX::BI__nvvm_atom_acquire_sys_dec_shared_ui:
21856  case NVPTX::BI__nvvm_atom_acquire_sys_dec_shared_ul:
21857  case NVPTX::BI__nvvm_atom_acquire_sys_dec_shared_ull:
21858  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_acquire_sys, *this, E);
21859  case NVPTX::BI__nvvm_atom_acquire_cta_and_shared_i:
21860  case NVPTX::BI__nvvm_atom_acquire_cta_and_shared_l:
21861  case NVPTX::BI__nvvm_atom_acquire_cta_and_shared_ll:
21862  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_acquire_cta, *this, E);
21863  case NVPTX::BI__nvvm_atom_acquire_sys_and_shared_i:
21864  case NVPTX::BI__nvvm_atom_acquire_sys_and_shared_l:
21865  case NVPTX::BI__nvvm_atom_acquire_sys_and_shared_ll:
21866  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_acquire_sys, *this, E);
21867  case NVPTX::BI__nvvm_atom_acquire_cta_or_shared_i:
21868  case NVPTX::BI__nvvm_atom_acquire_cta_or_shared_l:
21869  case NVPTX::BI__nvvm_atom_acquire_cta_or_shared_ll:
21870  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_acquire_cta, *this, E);
21871  case NVPTX::BI__nvvm_atom_acquire_sys_or_shared_i:
21872  case NVPTX::BI__nvvm_atom_acquire_sys_or_shared_l:
21873  case NVPTX::BI__nvvm_atom_acquire_sys_or_shared_ll:
21874  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_acquire_sys, *this, E);
21875  case NVPTX::BI__nvvm_atom_acquire_cta_xor_shared_i:
21876  case NVPTX::BI__nvvm_atom_acquire_cta_xor_shared_l:
21877  case NVPTX::BI__nvvm_atom_acquire_cta_xor_shared_ll:
21878  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_acquire_cta, *this, E);
21879  case NVPTX::BI__nvvm_atom_acquire_sys_xor_shared_i:
21880  case NVPTX::BI__nvvm_atom_acquire_sys_xor_shared_l:
21881  case NVPTX::BI__nvvm_atom_acquire_sys_xor_shared_ll:
21882  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_acquire_sys, *this, E);
21883  case NVPTX::BI__nvvm_atom_acquire_cta_cas_shared_i:
21884  case NVPTX::BI__nvvm_atom_acquire_cta_cas_shared_l:
21885  case NVPTX::BI__nvvm_atom_acquire_cta_cas_shared_ll:
21886  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_acquire_cta);
21887  case NVPTX::BI__nvvm_atom_acquire_sys_cas_shared_i:
21888  case NVPTX::BI__nvvm_atom_acquire_sys_cas_shared_l:
21889  case NVPTX::BI__nvvm_atom_acquire_sys_cas_shared_ll:
21890  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_acquire_sys);
21891  case NVPTX::BI__nvvm_atom_acquire_cta_cas_shared_f:
21892  case NVPTX::BI__nvvm_atom_acquire_cta_cas_shared_d:
21893  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_acquire_cta);
21894  case NVPTX::BI__nvvm_atom_acquire_sys_cas_shared_f:
21895  case NVPTX::BI__nvvm_atom_acquire_sys_cas_shared_d:
21896  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_acquire_sys);
21897  case NVPTX::BI__nvvm_atom_release_add_shared_i:
21898  case NVPTX::BI__nvvm_atom_release_add_shared_l:
21899  case NVPTX::BI__nvvm_atom_release_add_shared_ll:
21900  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_release, *this, E);
21901  case NVPTX::BI__nvvm_atom_release_add_shared_f:
21902  case NVPTX::BI__nvvm_atom_release_add_shared_d:
21903  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_release, *this, E);
21904  case NVPTX::BI__nvvm_atom_release_xchg_shared_i:
21905  case NVPTX::BI__nvvm_atom_release_xchg_shared_l:
21906  case NVPTX::BI__nvvm_atom_release_xchg_shared_ll:
21907  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_release, *this, E);
21908  case NVPTX::BI__nvvm_atom_release_xchg_shared_f:
21909  case NVPTX::BI__nvvm_atom_release_xchg_shared_d:
21910  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_release, *this, E);
21911  case NVPTX::BI__nvvm_atom_release_max_shared_i:
21912  case NVPTX::BI__nvvm_atom_release_max_shared_l:
21913  case NVPTX::BI__nvvm_atom_release_max_shared_ll:
21914  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_release, *this, E);
21915  case NVPTX::BI__nvvm_atom_release_max_shared_ui:
21916  case NVPTX::BI__nvvm_atom_release_max_shared_ul:
21917  case NVPTX::BI__nvvm_atom_release_max_shared_ull:
21918  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_release, *this, E);
21919  case NVPTX::BI__nvvm_atom_release_min_shared_i:
21920  case NVPTX::BI__nvvm_atom_release_min_shared_l:
21921  case NVPTX::BI__nvvm_atom_release_min_shared_ll:
21922  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_release, *this, E);
21923  case NVPTX::BI__nvvm_atom_release_min_shared_ui:
21924  case NVPTX::BI__nvvm_atom_release_min_shared_ul:
21925  case NVPTX::BI__nvvm_atom_release_min_shared_ull:
21926  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_release, *this, E);
21927  case NVPTX::BI__nvvm_atom_release_inc_shared_ui:
21928  case NVPTX::BI__nvvm_atom_release_inc_shared_ul:
21929  case NVPTX::BI__nvvm_atom_release_inc_shared_ull:
21930  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_release, *this, E);
21931  case NVPTX::BI__nvvm_atom_release_dec_shared_ui:
21932  case NVPTX::BI__nvvm_atom_release_dec_shared_ul:
21933  case NVPTX::BI__nvvm_atom_release_dec_shared_ull:
21934  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_release, *this, E);
21935  case NVPTX::BI__nvvm_atom_release_and_shared_i:
21936  case NVPTX::BI__nvvm_atom_release_and_shared_l:
21937  case NVPTX::BI__nvvm_atom_release_and_shared_ll:
21938  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_release, *this, E);
21939  case NVPTX::BI__nvvm_atom_release_or_shared_i:
21940  case NVPTX::BI__nvvm_atom_release_or_shared_l:
21941  case NVPTX::BI__nvvm_atom_release_or_shared_ll:
21942  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_release, *this, E);
21943  case NVPTX::BI__nvvm_atom_release_xor_shared_i:
21944  case NVPTX::BI__nvvm_atom_release_xor_shared_l:
21945  case NVPTX::BI__nvvm_atom_release_xor_shared_ll:
21946  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_release, *this, E);
21947  case NVPTX::BI__nvvm_atom_release_cas_shared_i:
21948  case NVPTX::BI__nvvm_atom_release_cas_shared_l:
21949  case NVPTX::BI__nvvm_atom_release_cas_shared_ll:
21950  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_release);
21951  case NVPTX::BI__nvvm_atom_release_cas_shared_f:
21952  case NVPTX::BI__nvvm_atom_release_cas_shared_d:
21953  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_release);
21954  case NVPTX::BI__nvvm_atom_release_cta_add_shared_i:
21955  case NVPTX::BI__nvvm_atom_release_cta_add_shared_l:
21956  case NVPTX::BI__nvvm_atom_release_cta_add_shared_ll:
21957  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_release_cta, *this, E);
21958  case NVPTX::BI__nvvm_atom_release_sys_add_shared_i:
21959  case NVPTX::BI__nvvm_atom_release_sys_add_shared_l:
21960  case NVPTX::BI__nvvm_atom_release_sys_add_shared_ll:
21961  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_release_sys, *this, E);
21962  case NVPTX::BI__nvvm_atom_release_cta_add_shared_f:
21963  case NVPTX::BI__nvvm_atom_release_cta_add_shared_d:
21964  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_release_cta, *this, E);
21965  case NVPTX::BI__nvvm_atom_release_sys_add_shared_f:
21966  case NVPTX::BI__nvvm_atom_release_sys_add_shared_d:
21967  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_release_sys, *this, E);
21968  case NVPTX::BI__nvvm_atom_release_cta_xchg_shared_i:
21969  case NVPTX::BI__nvvm_atom_release_cta_xchg_shared_l:
21970  case NVPTX::BI__nvvm_atom_release_cta_xchg_shared_ll:
21971  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_release_cta, *this, E);
21972  case NVPTX::BI__nvvm_atom_release_cta_xchg_shared_f:
21973  case NVPTX::BI__nvvm_atom_release_cta_xchg_shared_d:
21974  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_release_cta, *this, E);
21975  case NVPTX::BI__nvvm_atom_release_sys_xchg_shared_i:
21976  case NVPTX::BI__nvvm_atom_release_sys_xchg_shared_l:
21977  case NVPTX::BI__nvvm_atom_release_sys_xchg_shared_ll:
21978  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_release_sys, *this, E);
21979  case NVPTX::BI__nvvm_atom_release_sys_xchg_shared_f:
21980  case NVPTX::BI__nvvm_atom_release_sys_xchg_shared_d:
21981  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_release_sys, *this, E);
21982  case NVPTX::BI__nvvm_atom_release_cta_max_shared_i:
21983  case NVPTX::BI__nvvm_atom_release_cta_max_shared_l:
21984  case NVPTX::BI__nvvm_atom_release_cta_max_shared_ll:
21985  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_release_cta, *this, E);
21986  case NVPTX::BI__nvvm_atom_release_cta_max_shared_ui:
21987  case NVPTX::BI__nvvm_atom_release_cta_max_shared_ul:
21988  case NVPTX::BI__nvvm_atom_release_cta_max_shared_ull:
21989  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_release_cta, *this, E);
21990  case NVPTX::BI__nvvm_atom_release_sys_max_shared_i:
21991  case NVPTX::BI__nvvm_atom_release_sys_max_shared_l:
21992  case NVPTX::BI__nvvm_atom_release_sys_max_shared_ll:
21993  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_release_sys, *this, E);
21994  case NVPTX::BI__nvvm_atom_release_sys_max_shared_ui:
21995  case NVPTX::BI__nvvm_atom_release_sys_max_shared_ul:
21996  case NVPTX::BI__nvvm_atom_release_sys_max_shared_ull:
21997  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_release_sys, *this, E);
21998  case NVPTX::BI__nvvm_atom_release_cta_min_shared_i:
21999  case NVPTX::BI__nvvm_atom_release_cta_min_shared_l:
22000  case NVPTX::BI__nvvm_atom_release_cta_min_shared_ll:
22001  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_release_cta, *this, E);
22002  case NVPTX::BI__nvvm_atom_release_cta_min_shared_ui:
22003  case NVPTX::BI__nvvm_atom_release_cta_min_shared_ul:
22004  case NVPTX::BI__nvvm_atom_release_cta_min_shared_ull:
22005  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_release_cta, *this, E);
22006  case NVPTX::BI__nvvm_atom_release_sys_min_shared_i:
22007  case NVPTX::BI__nvvm_atom_release_sys_min_shared_l:
22008  case NVPTX::BI__nvvm_atom_release_sys_min_shared_ll:
22009  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_release_sys, *this, E);
22010  case NVPTX::BI__nvvm_atom_release_sys_min_shared_ui:
22011  case NVPTX::BI__nvvm_atom_release_sys_min_shared_ul:
22012  case NVPTX::BI__nvvm_atom_release_sys_min_shared_ull:
22013  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_release_sys, *this, E);
22014  case NVPTX::BI__nvvm_atom_release_cta_inc_shared_ui:
22015  case NVPTX::BI__nvvm_atom_release_cta_inc_shared_ul:
22016  case NVPTX::BI__nvvm_atom_release_cta_inc_shared_ull:
22017  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_release_cta, *this, E);
22018  case NVPTX::BI__nvvm_atom_release_cta_dec_shared_ui:
22019  case NVPTX::BI__nvvm_atom_release_cta_dec_shared_ul:
22020  case NVPTX::BI__nvvm_atom_release_cta_dec_shared_ull:
22021  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_release_cta, *this, E);
22022  case NVPTX::BI__nvvm_atom_release_sys_inc_shared_ui:
22023  case NVPTX::BI__nvvm_atom_release_sys_inc_shared_ul:
22024  case NVPTX::BI__nvvm_atom_release_sys_inc_shared_ull:
22025  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_release_sys, *this, E);
22026  case NVPTX::BI__nvvm_atom_release_sys_dec_shared_ui:
22027  case NVPTX::BI__nvvm_atom_release_sys_dec_shared_ul:
22028  case NVPTX::BI__nvvm_atom_release_sys_dec_shared_ull:
22029  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_release_sys, *this, E);
22030  case NVPTX::BI__nvvm_atom_release_cta_and_shared_i:
22031  case NVPTX::BI__nvvm_atom_release_cta_and_shared_l:
22032  case NVPTX::BI__nvvm_atom_release_cta_and_shared_ll:
22033  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_release_cta, *this, E);
22034  case NVPTX::BI__nvvm_atom_release_sys_and_shared_i:
22035  case NVPTX::BI__nvvm_atom_release_sys_and_shared_l:
22036  case NVPTX::BI__nvvm_atom_release_sys_and_shared_ll:
22037  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_release_sys, *this, E);
22038  case NVPTX::BI__nvvm_atom_release_cta_or_shared_i:
22039  case NVPTX::BI__nvvm_atom_release_cta_or_shared_l:
22040  case NVPTX::BI__nvvm_atom_release_cta_or_shared_ll:
22041  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_release_cta, *this, E);
22042  case NVPTX::BI__nvvm_atom_release_sys_or_shared_i:
22043  case NVPTX::BI__nvvm_atom_release_sys_or_shared_l:
22044  case NVPTX::BI__nvvm_atom_release_sys_or_shared_ll:
22045  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_release_sys, *this, E);
22046  case NVPTX::BI__nvvm_atom_release_cta_xor_shared_i:
22047  case NVPTX::BI__nvvm_atom_release_cta_xor_shared_l:
22048  case NVPTX::BI__nvvm_atom_release_cta_xor_shared_ll:
22049  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_release_cta, *this, E);
22050  case NVPTX::BI__nvvm_atom_release_sys_xor_shared_i:
22051  case NVPTX::BI__nvvm_atom_release_sys_xor_shared_l:
22052  case NVPTX::BI__nvvm_atom_release_sys_xor_shared_ll:
22053  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_release_sys, *this, E);
22054  case NVPTX::BI__nvvm_atom_release_cta_cas_shared_i:
22055  case NVPTX::BI__nvvm_atom_release_cta_cas_shared_l:
22056  case NVPTX::BI__nvvm_atom_release_cta_cas_shared_ll:
22057  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_release_cta);
22058  case NVPTX::BI__nvvm_atom_release_sys_cas_shared_i:
22059  case NVPTX::BI__nvvm_atom_release_sys_cas_shared_l:
22060  case NVPTX::BI__nvvm_atom_release_sys_cas_shared_ll:
22061  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_release_sys);
22062  case NVPTX::BI__nvvm_atom_release_cta_cas_shared_f:
22063  case NVPTX::BI__nvvm_atom_release_cta_cas_shared_d:
22064  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_release_cta);
22065  case NVPTX::BI__nvvm_atom_release_sys_cas_shared_f:
22066  case NVPTX::BI__nvvm_atom_release_sys_cas_shared_d:
22067  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_release_sys);
22068  case NVPTX::BI__nvvm_atom_acq_rel_add_shared_i:
22069  case NVPTX::BI__nvvm_atom_acq_rel_add_shared_l:
22070  case NVPTX::BI__nvvm_atom_acq_rel_add_shared_ll:
22071  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_acq_rel, *this, E);
22072  case NVPTX::BI__nvvm_atom_acq_rel_add_shared_f:
22073  case NVPTX::BI__nvvm_atom_acq_rel_add_shared_d:
22074  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_acq_rel, *this, E);
22075  case NVPTX::BI__nvvm_atom_acq_rel_xchg_shared_i:
22076  case NVPTX::BI__nvvm_atom_acq_rel_xchg_shared_l:
22077  case NVPTX::BI__nvvm_atom_acq_rel_xchg_shared_ll:
22078  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_acq_rel, *this, E);
22079  case NVPTX::BI__nvvm_atom_acq_rel_xchg_shared_f:
22080  case NVPTX::BI__nvvm_atom_acq_rel_xchg_shared_d:
22081  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_acq_rel, *this, E);
22082  case NVPTX::BI__nvvm_atom_acq_rel_max_shared_i:
22083  case NVPTX::BI__nvvm_atom_acq_rel_max_shared_l:
22084  case NVPTX::BI__nvvm_atom_acq_rel_max_shared_ll:
22085  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_acq_rel, *this, E);
22086  case NVPTX::BI__nvvm_atom_acq_rel_max_shared_ui:
22087  case NVPTX::BI__nvvm_atom_acq_rel_max_shared_ul:
22088  case NVPTX::BI__nvvm_atom_acq_rel_max_shared_ull:
22089  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_acq_rel, *this, E);
22090  case NVPTX::BI__nvvm_atom_acq_rel_min_shared_i:
22091  case NVPTX::BI__nvvm_atom_acq_rel_min_shared_l:
22092  case NVPTX::BI__nvvm_atom_acq_rel_min_shared_ll:
22093  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_acq_rel, *this, E);
22094  case NVPTX::BI__nvvm_atom_acq_rel_min_shared_ui:
22095  case NVPTX::BI__nvvm_atom_acq_rel_min_shared_ul:
22096  case NVPTX::BI__nvvm_atom_acq_rel_min_shared_ull:
22097  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_acq_rel, *this, E);
22098  case NVPTX::BI__nvvm_atom_acq_rel_inc_shared_ui:
22099  case NVPTX::BI__nvvm_atom_acq_rel_inc_shared_ul:
22100  case NVPTX::BI__nvvm_atom_acq_rel_inc_shared_ull:
22101  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_acq_rel, *this, E);
22102  case NVPTX::BI__nvvm_atom_acq_rel_dec_shared_ui:
22103  case NVPTX::BI__nvvm_atom_acq_rel_dec_shared_ul:
22104  case NVPTX::BI__nvvm_atom_acq_rel_dec_shared_ull:
22105  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_acq_rel, *this, E);
22106  case NVPTX::BI__nvvm_atom_acq_rel_and_shared_i:
22107  case NVPTX::BI__nvvm_atom_acq_rel_and_shared_l:
22108  case NVPTX::BI__nvvm_atom_acq_rel_and_shared_ll:
22109  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_acq_rel, *this, E);
22110  case NVPTX::BI__nvvm_atom_acq_rel_or_shared_i:
22111  case NVPTX::BI__nvvm_atom_acq_rel_or_shared_l:
22112  case NVPTX::BI__nvvm_atom_acq_rel_or_shared_ll:
22113  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_acq_rel, *this, E);
22114  case NVPTX::BI__nvvm_atom_acq_rel_xor_shared_i:
22115  case NVPTX::BI__nvvm_atom_acq_rel_xor_shared_l:
22116  case NVPTX::BI__nvvm_atom_acq_rel_xor_shared_ll:
22117  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_acq_rel, *this, E);
22118  case NVPTX::BI__nvvm_atom_acq_rel_cas_shared_i:
22119  case NVPTX::BI__nvvm_atom_acq_rel_cas_shared_l:
22120  case NVPTX::BI__nvvm_atom_acq_rel_cas_shared_ll:
22121  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_acq_rel);
22122  case NVPTX::BI__nvvm_atom_acq_rel_cas_shared_f:
22123  case NVPTX::BI__nvvm_atom_acq_rel_cas_shared_d:
22124  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_acq_rel);
22125  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_shared_i:
22126  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_shared_l:
22127  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_shared_ll:
22128  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_acq_rel_cta, *this, E);
22129  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_shared_i:
22130  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_shared_l:
22131  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_shared_ll:
22132  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_i_acq_rel_sys, *this, E);
22133  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_shared_f:
22134  case NVPTX::BI__nvvm_atom_acq_rel_cta_add_shared_d:
22135  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_acq_rel_cta, *this, E);
22136  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_shared_f:
22137  case NVPTX::BI__nvvm_atom_acq_rel_sys_add_shared_d:
22138  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_shared_f_acq_rel_sys, *this, E);
22139  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_shared_i:
22140  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_shared_l:
22141  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_shared_ll:
22142  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_acq_rel_cta, *this, E);
22143  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_shared_f:
22144  case NVPTX::BI__nvvm_atom_acq_rel_cta_xchg_shared_d:
22145  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_acq_rel_cta, *this, E);
22146  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_shared_i:
22147  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_shared_l:
22148  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_shared_ll:
22149  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_i_acq_rel_sys, *this, E);
22150  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_shared_f:
22151  case NVPTX::BI__nvvm_atom_acq_rel_sys_xchg_shared_d:
22152  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_shared_f_acq_rel_sys, *this, E);
22153  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_shared_i:
22154  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_shared_l:
22155  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_shared_ll:
22156  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_acq_rel_cta, *this, E);
22157  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_shared_ui:
22158  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_shared_ul:
22159  case NVPTX::BI__nvvm_atom_acq_rel_cta_max_shared_ull:
22160  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_acq_rel_cta, *this, E);
22161  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_shared_i:
22162  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_shared_l:
22163  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_shared_ll:
22164  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_i_acq_rel_sys, *this, E);
22165  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_shared_ui:
22166  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_shared_ul:
22167  case NVPTX::BI__nvvm_atom_acq_rel_sys_max_shared_ull:
22168  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_shared_ui_acq_rel_sys, *this, E);
22169  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_shared_i:
22170  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_shared_l:
22171  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_shared_ll:
22172  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_acq_rel_cta, *this, E);
22173  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_shared_ui:
22174  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_shared_ul:
22175  case NVPTX::BI__nvvm_atom_acq_rel_cta_min_shared_ull:
22176  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_acq_rel_cta, *this, E);
22177  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_shared_i:
22178  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_shared_l:
22179  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_shared_ll:
22180  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_i_acq_rel_sys, *this, E);
22181  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_shared_ui:
22182  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_shared_ul:
22183  case NVPTX::BI__nvvm_atom_acq_rel_sys_min_shared_ull:
22184  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_shared_ui_acq_rel_sys, *this, E);
22185  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_shared_ui:
22186  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_shared_ul:
22187  case NVPTX::BI__nvvm_atom_acq_rel_cta_inc_shared_ull:
22188  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_acq_rel_cta, *this, E);
22189  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_shared_ui:
22190  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_shared_ul:
22191  case NVPTX::BI__nvvm_atom_acq_rel_cta_dec_shared_ull:
22192  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_acq_rel_cta, *this, E);
22193  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_shared_ui:
22194  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_shared_ul:
22195  case NVPTX::BI__nvvm_atom_acq_rel_sys_inc_shared_ull:
22196  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_shared_i_acq_rel_sys, *this, E);
22197  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_shared_ui:
22198  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_shared_ul:
22199  case NVPTX::BI__nvvm_atom_acq_rel_sys_dec_shared_ull:
22200  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_shared_i_acq_rel_sys, *this, E);
22201  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_shared_i:
22202  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_shared_l:
22203  case NVPTX::BI__nvvm_atom_acq_rel_cta_and_shared_ll:
22204  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_acq_rel_cta, *this, E);
22205  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_shared_i:
22206  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_shared_l:
22207  case NVPTX::BI__nvvm_atom_acq_rel_sys_and_shared_ll:
22208  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_shared_i_acq_rel_sys, *this, E);
22209  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_shared_i:
22210  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_shared_l:
22211  case NVPTX::BI__nvvm_atom_acq_rel_cta_or_shared_ll:
22212  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_acq_rel_cta, *this, E);
22213  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_shared_i:
22214  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_shared_l:
22215  case NVPTX::BI__nvvm_atom_acq_rel_sys_or_shared_ll:
22216  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_shared_i_acq_rel_sys, *this, E);
22217  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_shared_i:
22218  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_shared_l:
22219  case NVPTX::BI__nvvm_atom_acq_rel_cta_xor_shared_ll:
22220  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_acq_rel_cta, *this, E);
22221  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_shared_i:
22222  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_shared_l:
22223  case NVPTX::BI__nvvm_atom_acq_rel_sys_xor_shared_ll:
22224  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_shared_i_acq_rel_sys, *this, E);
22225  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_shared_i:
22226  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_shared_l:
22227  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_shared_ll:
22228  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_acq_rel_cta);
22229  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_shared_i:
22230  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_shared_l:
22231  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_shared_ll:
22232  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_i_acq_rel_sys);
22233  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_shared_f:
22234  case NVPTX::BI__nvvm_atom_acq_rel_cta_cas_shared_d:
22235  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_acq_rel_cta);
22236  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_shared_f:
22237  case NVPTX::BI__nvvm_atom_acq_rel_sys_cas_shared_d:
22238  return MakeScopedCasAtomic(Intrinsic::nvvm_atomic_cas_shared_f_acq_rel_sys);
22239  case NVPTX::BI__nvvm_match_all_sync_i32p:
22240  case NVPTX::BI__nvvm_match_all_sync_i64p: {
22241  Value *Mask = EmitScalarExpr(E->getArg(0));
22242  Value *Val = EmitScalarExpr(E->getArg(1));
22243  Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
22244  Value *ResultPair = Builder.CreateCall(
22245  CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
22246  ? Intrinsic::nvvm_match_all_sync_i32p
22247  : Intrinsic::nvvm_match_all_sync_i64p),
22248  {Mask, Val});
22249  Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
22250  PredOutPtr.getElementType());
22251  Builder.CreateStore(Pred, PredOutPtr);
22252  return Builder.CreateExtractValue(ResultPair, 0);
22253  }
22254 
22255  // FP MMA loads
22256  case NVPTX::BI__hmma_m16n16k16_ld_a:
22257  case NVPTX::BI__hmma_m16n16k16_ld_b:
22258  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
22259  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
22260  case NVPTX::BI__hmma_m32n8k16_ld_a:
22261  case NVPTX::BI__hmma_m32n8k16_ld_b:
22262  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
22263  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
22264  case NVPTX::BI__hmma_m8n32k16_ld_a:
22265  case NVPTX::BI__hmma_m8n32k16_ld_b:
22266  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
22267  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
22268  // Integer MMA loads.
22269  case NVPTX::BI__imma_m16n16k16_ld_a_s8:
22270  case NVPTX::BI__imma_m16n16k16_ld_a_u8:
22271  case NVPTX::BI__imma_m16n16k16_ld_b_s8:
22272  case NVPTX::BI__imma_m16n16k16_ld_b_u8:
22273  case NVPTX::BI__imma_m16n16k16_ld_c:
22274  case NVPTX::BI__imma_m32n8k16_ld_a_s8:
22275  case NVPTX::BI__imma_m32n8k16_ld_a_u8:
22276  case NVPTX::BI__imma_m32n8k16_ld_b_s8:
22277  case NVPTX::BI__imma_m32n8k16_ld_b_u8:
22278  case NVPTX::BI__imma_m32n8k16_ld_c:
22279  case NVPTX::BI__imma_m8n32k16_ld_a_s8:
22280  case NVPTX::BI__imma_m8n32k16_ld_a_u8:
22281  case NVPTX::BI__imma_m8n32k16_ld_b_s8:
22282  case NVPTX::BI__imma_m8n32k16_ld_b_u8:
22283  case NVPTX::BI__imma_m8n32k16_ld_c:
22284  // Sub-integer MMA loads.
22285  case NVPTX::BI__imma_m8n8k32_ld_a_s4:
22286  case NVPTX::BI__imma_m8n8k32_ld_a_u4:
22287  case NVPTX::BI__imma_m8n8k32_ld_b_s4:
22288  case NVPTX::BI__imma_m8n8k32_ld_b_u4:
22289  case NVPTX::BI__imma_m8n8k32_ld_c:
22290  case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
22291  case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
22292  case NVPTX::BI__bmma_m8n8k128_ld_c:
22293  // Double MMA loads.
22294  case NVPTX::BI__dmma_m8n8k4_ld_a:
22295  case NVPTX::BI__dmma_m8n8k4_ld_b:
22296  case NVPTX::BI__dmma_m8n8k4_ld_c:
22297  // Alternate float MMA loads.
22298  case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
22299  case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
22300  case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
22301  case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
22302  case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
22303  case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
22304  case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
22305  case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
22306  case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
22308  Value *Src = EmitScalarExpr(E->getArg(1));
22309  Value *Ldm = EmitScalarExpr(E->getArg(2));
22310  std::optional<llvm::APSInt> isColMajorArg =
22311  E->getArg(3)->getIntegerConstantExpr(getContext());
22312  if (!isColMajorArg)
22313  return nullptr;
22314  bool isColMajor = isColMajorArg->getSExtValue();
22315  NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
22316  unsigned IID = isColMajor ? II.IID_col : II.IID_row;
22317  if (IID == 0)
22318  return nullptr;
22319 
22320  Value *Result =
22321  Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
22322 
22323  // Save returned values.
22324  assert(II.NumResults);
22325  if (II.NumResults == 1) {
22326  Builder.CreateAlignedStore(Result, Dst.emitRawPointer(*this),
22328  } else {
22329  for (unsigned i = 0; i < II.NumResults; ++i) {
22330  Builder.CreateAlignedStore(
22331  Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
22332  Dst.getElementType()),
22333  Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),
22334  llvm::ConstantInt::get(IntTy, i)),
22336  }
22337  }
22338  return Result;
22339  }
22340 
22341  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
22342  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
22343  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
22344  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
22345  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
22346  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
22347  case NVPTX::BI__imma_m16n16k16_st_c_i32:
22348  case NVPTX::BI__imma_m32n8k16_st_c_i32:
22349  case NVPTX::BI__imma_m8n32k16_st_c_i32:
22350  case NVPTX::BI__imma_m8n8k32_st_c_i32:
22351  case NVPTX::BI__bmma_m8n8k128_st_c_i32:
22352  case NVPTX::BI__dmma_m8n8k4_st_c_f64:
22353  case NVPTX::BI__mma_m16n16k8_st_c_f32: {
22354  Value *Dst = EmitScalarExpr(E->getArg(0));
22356  Value *Ldm = EmitScalarExpr(E->getArg(2));
22357  std::optional<llvm::APSInt> isColMajorArg =
22358  E->getArg(3)->getIntegerConstantExpr(getContext());
22359  if (!isColMajorArg)
22360  return nullptr;
22361  bool isColMajor = isColMajorArg->getSExtValue();
22362  NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
22363  unsigned IID = isColMajor ? II.IID_col : II.IID_row;
22364  if (IID == 0)
22365  return nullptr;
22366  Function *Intrinsic =
22367  CGM.getIntrinsic(IID, Dst->getType());
22368  llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
22369  SmallVector<Value *, 10> Values = {Dst};
22370  for (unsigned i = 0; i < II.NumResults; ++i) {
22371  Value *V = Builder.CreateAlignedLoad(
22372  Src.getElementType(),
22373  Builder.CreateGEP(Src.getElementType(), Src.emitRawPointer(*this),
22374  llvm::ConstantInt::get(IntTy, i)),
22376  Values.push_back(Builder.CreateBitCast(V, ParamType));
22377  }
22378  Values.push_back(Ldm);
22379  Value *Result = Builder.CreateCall(Intrinsic, Values);
22380  return Result;
22381  }
22382 
22383  // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
22384  // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
22385  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
22386  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
22387  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
22388  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
22389  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
22390  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
22391  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
22392  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
22393  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
22394  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
22395  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
22396  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
22397  case NVPTX::BI__imma_m16n16k16_mma_s8:
22398  case NVPTX::BI__imma_m16n16k16_mma_u8:
22399  case NVPTX::BI__imma_m32n8k16_mma_s8:
22400  case NVPTX::BI__imma_m32n8k16_mma_u8:
22401  case NVPTX::BI__imma_m8n32k16_mma_s8:
22402  case NVPTX::BI__imma_m8n32k16_mma_u8:
22403  case NVPTX::BI__imma_m8n8k32_mma_s4:
22404  case NVPTX::BI__imma_m8n8k32_mma_u4:
22405  case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
22406  case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
22407  case NVPTX::BI__dmma_m8n8k4_mma_f64:
22408  case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
22409  case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
22410  case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
22411  case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
22413  Address SrcA = EmitPointerWithAlignment(E->getArg(1));
22414  Address SrcB = EmitPointerWithAlignment(E->getArg(2));
22415  Address SrcC = EmitPointerWithAlignment(E->getArg(3));
22416  std::optional<llvm::APSInt> LayoutArg =
22417  E->getArg(4)->getIntegerConstantExpr(getContext());
22418  if (!LayoutArg)
22419  return nullptr;
22420  int Layout = LayoutArg->getSExtValue();
22421  if (Layout < 0 || Layout > 3)
22422  return nullptr;
22423  llvm::APSInt SatfArg;
22424  if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
22425  BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
22426  SatfArg = 0; // .b1 does not have satf argument.
22427  else if (std::optional<llvm::APSInt> OptSatfArg =
22428  E->getArg(5)->getIntegerConstantExpr(getContext()))
22429  SatfArg = *OptSatfArg;
22430  else
22431  return nullptr;
22432  bool Satf = SatfArg.getSExtValue();
22433  NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
22434  unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
22435  if (IID == 0) // Unsupported combination of Layout/Satf.
22436  return nullptr;
22437 
22438  SmallVector<Value *, 24> Values;
22439  Function *Intrinsic = CGM.getIntrinsic(IID);
22440  llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
22441  // Load A
22442  for (unsigned i = 0; i < MI.NumEltsA; ++i) {
22443  Value *V = Builder.CreateAlignedLoad(
22444  SrcA.getElementType(),
22445  Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
22446  llvm::ConstantInt::get(IntTy, i)),
22448  Values.push_back(Builder.CreateBitCast(V, AType));
22449  }
22450  // Load B
22451  llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
22452  for (unsigned i = 0; i < MI.NumEltsB; ++i) {
22453  Value *V = Builder.CreateAlignedLoad(
22454  SrcB.getElementType(),
22455  Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
22456  llvm::ConstantInt::get(IntTy, i)),
22458  Values.push_back(Builder.CreateBitCast(V, BType));
22459  }
22460  // Load C
22461  llvm::Type *CType =
22462  Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
22463  for (unsigned i = 0; i < MI.NumEltsC; ++i) {
22464  Value *V = Builder.CreateAlignedLoad(
22465  SrcC.getElementType(),
22466  Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
22467  llvm::ConstantInt::get(IntTy, i)),
22469  Values.push_back(Builder.CreateBitCast(V, CType));
22470  }
22471  Value *Result = Builder.CreateCall(Intrinsic, Values);
22472  llvm::Type *DType = Dst.getElementType();
22473  for (unsigned i = 0; i < MI.NumEltsD; ++i)
22474  Builder.CreateAlignedStore(
22475  Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
22476  Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),
22477  llvm::ConstantInt::get(IntTy, i)),
22479  return Result;
22480  }
22481  // The following builtins require half type support
22482  case NVPTX::BI__nvvm_ex2_approx_f16:
22483  return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
22484  case NVPTX::BI__nvvm_ex2_approx_f16x2:
22485  return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
22486  case NVPTX::BI__nvvm_ff2f16x2_rn:
22487  return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
22488  case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
22489  return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
22490  case NVPTX::BI__nvvm_ff2f16x2_rz:
22491  return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
22492  case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
22493  return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
22494  case NVPTX::BI__nvvm_fma_rn_f16:
22495  return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
22496  case NVPTX::BI__nvvm_fma_rn_f16x2:
22497  return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
22498  case NVPTX::BI__nvvm_fma_rn_ftz_f16:
22499  return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
22500  case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
22501  return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
22502  case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
22503  return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
22504  *this);
22505  case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
22506  return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
22507  *this);
22508  case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
22509  return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
22510  *this);
22511  case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
22512  return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
22513  *this);
22514  case NVPTX::BI__nvvm_fma_rn_relu_f16:
22515  return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
22516  case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
22517  return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
22518  case NVPTX::BI__nvvm_fma_rn_sat_f16:
22519  return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
22520  case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
22521  return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
22522  case NVPTX::BI__nvvm_fmax_f16:
22523  return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
22524  case NVPTX::BI__nvvm_fmax_f16x2:
22525  return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
22526  case NVPTX::BI__nvvm_fmax_ftz_f16:
22527  return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
22528  case NVPTX::BI__nvvm_fmax_ftz_f16x2:
22529  return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
22530  case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
22531  return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
22532  case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
22533  return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
22534  *this);
22535  case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
22536  return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
22537  E, *this);
22538  case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
22539  return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
22540  BuiltinID, E, *this);
22541  case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
22542  return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
22543  *this);
22544  case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
22545  return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
22546  E, *this);
22547  case NVPTX::BI__nvvm_fmax_nan_f16:
22548  return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
22549  case NVPTX::BI__nvvm_fmax_nan_f16x2:
22550  return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
22551  case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
22552  return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
22553  *this);
22554  case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
22555  return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
22556  E, *this);
22557  case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
22558  return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
22559  *this);
22560  case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
22561  return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
22562  *this);
22563  case NVPTX::BI__nvvm_fmin_f16:
22564  return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
22565  case NVPTX::BI__nvvm_fmin_f16x2:
22566  return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
22567  case NVPTX::BI__nvvm_fmin_ftz_f16:
22568  return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
22569  case NVPTX::BI__nvvm_fmin_ftz_f16x2:
22570  return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
22571  case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
22572  return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
22573  case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
22574  return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
22575  *this);
22576  case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
22577  return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
22578  E, *this);
22579  case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
22580  return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
22581  BuiltinID, E, *this);
22582  case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
22583  return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
22584  *this);
22585  case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
22586  return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
22587  E, *this);
22588  case NVPTX::BI__nvvm_fmin_nan_f16:
22589  return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
22590  case NVPTX::BI__nvvm_fmin_nan_f16x2:
22591  return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
22592  case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
22593  return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
22594  *this);
22595  case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
22596  return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
22597  E, *this);
22598  case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
22599  return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
22600  *this);
22601  case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
22602  return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
22603  *this);
22604  case NVPTX::BI__nvvm_ldg_h:
22605  return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
22606  case NVPTX::BI__nvvm_ldg_h2:
22607  return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
22608  case NVPTX::BI__nvvm_ldu_h:
22609  return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
22610  case NVPTX::BI__nvvm_ldu_h2: {
22611  return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
22612  }
22613  case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
22614  return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
22615  Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
22616  4);
22617  case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
22618  return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
22619  Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
22620  8);
22621  case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
22622  return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
22623  Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
22624  16);
22625  case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
22626  return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
22627  Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
22628  16);
22629  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
22630  return Builder.CreateCall(
22631  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
22632  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
22633  return Builder.CreateCall(
22634  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
22635  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
22636  return Builder.CreateCall(
22637  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
22638  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
22639  return Builder.CreateCall(
22640  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
22641  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
22642  return Builder.CreateCall(
22643  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
22644  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
22645  return Builder.CreateCall(
22646  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
22647  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
22648  return Builder.CreateCall(
22649  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
22650  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
22651  return Builder.CreateCall(
22652  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
22653  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
22654  return Builder.CreateCall(
22655  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
22656  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
22657  return Builder.CreateCall(
22658  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
22659  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
22660  return Builder.CreateCall(
22661  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
22662  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
22663  return Builder.CreateCall(
22664  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
22665  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
22666  return Builder.CreateCall(
22667  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
22668  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
22669  return Builder.CreateCall(
22670  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
22671  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
22672  return Builder.CreateCall(
22673  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
22674  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
22675  return Builder.CreateCall(
22676  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
22677  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
22678  return Builder.CreateCall(
22679  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
22680  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
22681  return Builder.CreateCall(
22682  CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
22683  case NVPTX::BI__nvvm_is_explicit_cluster:
22684  return Builder.CreateCall(
22685  CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
22686  case NVPTX::BI__nvvm_isspacep_shared_cluster:
22687  return Builder.CreateCall(
22688  CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
22689  EmitScalarExpr(E->getArg(0)));
22690  case NVPTX::BI__nvvm_mapa:
22691  return Builder.CreateCall(
22692  CGM.getIntrinsic(Intrinsic::nvvm_mapa),
22693  {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22694  case NVPTX::BI__nvvm_mapa_shared_cluster:
22695  return Builder.CreateCall(
22696  CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
22697  {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22698  case NVPTX::BI__nvvm_getctarank:
22699  return Builder.CreateCall(
22700  CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
22701  EmitScalarExpr(E->getArg(0)));
22702  case NVPTX::BI__nvvm_getctarank_shared_cluster:
22703  return Builder.CreateCall(
22704  CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
22705  EmitScalarExpr(E->getArg(0)));
22706  case NVPTX::BI__nvvm_barrier_cluster_arrive:
22707  return Builder.CreateCall(
22708  CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
22709  case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
22710  return Builder.CreateCall(
22711  CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
22712  case NVPTX::BI__nvvm_barrier_cluster_wait:
22713  return Builder.CreateCall(
22714  CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
22715  case NVPTX::BI__nvvm_fence_sc_cluster:
22716  return Builder.CreateCall(
22717  CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
22718  default:
22719  return nullptr;
22720  }
22721 }
22722 
22723 namespace {
22724 struct BuiltinAlignArgs {
22725  llvm::Value *Src = nullptr;
22726  llvm::Type *SrcType = nullptr;
22727  llvm::Value *Alignment = nullptr;
22728  llvm::Value *Mask = nullptr;
22729  llvm::IntegerType *IntType = nullptr;
22730 
22731  BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
22732  QualType AstType = E->getArg(0)->getType();
22733  if (AstType->isArrayType())
22734  Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
22735  else
22736  Src = CGF.EmitScalarExpr(E->getArg(0));
22737  SrcType = Src->getType();
22738  if (SrcType->isPointerTy()) {
22739  IntType = IntegerType::get(
22740  CGF.getLLVMContext(),
22741  CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
22742  } else {
22743  assert(SrcType->isIntegerTy());
22744  IntType = cast<llvm::IntegerType>(SrcType);
22745  }
22746  Alignment = CGF.EmitScalarExpr(E->getArg(1));
22747  Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22748  auto *One = llvm::ConstantInt::get(IntType, 1);
22749  Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22750  }
22751 };
22752 } // namespace
22753 
22754 /// Generate (x & (y-1)) == 0.
22756  BuiltinAlignArgs Args(E, *this);
22757  llvm::Value *SrcAddress = Args.Src;
22758  if (Args.SrcType->isPointerTy())
22759  SrcAddress =
22760  Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22761  return RValue::get(Builder.CreateICmpEQ(
22762  Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22763  llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22764 }
22765 
22766 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22767 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22768 /// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22770  BuiltinAlignArgs Args(E, *this);
22771  llvm::Value *SrcForMask = Args.Src;
22772  if (AlignUp) {
22773  // When aligning up we have to first add the mask to ensure we go over the
22774  // next alignment value and then align down to the next valid multiple.
22775  // By adding the mask, we ensure that align_up on an already aligned
22776  // value will not change the value.
22777  if (Args.Src->getType()->isPointerTy()) {
22778  if (getLangOpts().isSignedOverflowDefined())
22779  SrcForMask =
22780  Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22781  else
22782  SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22783  /*SignedIndices=*/true,
22784  /*isSubtraction=*/false,
22785  E->getExprLoc(), "over_boundary");
22786  } else {
22787  SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22788  }
22789  }
22790  // Invert the mask to only clear the lower bits.
22791  llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22792  llvm::Value *Result = nullptr;
22793  if (Args.Src->getType()->isPointerTy()) {
22794  Result = Builder.CreateIntrinsic(
22795  Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22796  {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22797  } else {
22798  Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22799  }
22800  assert(Result->getType() == Args.SrcType);
22801  return RValue::get(Result);
22802 }
22803 
22805  const CallExpr *E) {
22806  switch (BuiltinID) {
22807  case WebAssembly::BI__builtin_wasm_memory_size: {
22808  llvm::Type *ResultType = ConvertType(E->getType());
22809  Value *I = EmitScalarExpr(E->getArg(0));
22810  Function *Callee =
22811  CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22812  return Builder.CreateCall(Callee, I);
22813  }
22814  case WebAssembly::BI__builtin_wasm_memory_grow: {
22815  llvm::Type *ResultType = ConvertType(E->getType());
22816  Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22817  EmitScalarExpr(E->getArg(1))};
22818  Function *Callee =
22819  CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22820  return Builder.CreateCall(Callee, Args);
22821  }
22822  case WebAssembly::BI__builtin_wasm_tls_size: {
22823  llvm::Type *ResultType = ConvertType(E->getType());
22824  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22825  return Builder.CreateCall(Callee);
22826  }
22827  case WebAssembly::BI__builtin_wasm_tls_align: {
22828  llvm::Type *ResultType = ConvertType(E->getType());
22829  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22830  return Builder.CreateCall(Callee);
22831  }
22832  case WebAssembly::BI__builtin_wasm_tls_base: {
22833  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22834  return Builder.CreateCall(Callee);
22835  }
22836  case WebAssembly::BI__builtin_wasm_throw: {
22837  Value *Tag = EmitScalarExpr(E->getArg(0));
22838  Value *Obj = EmitScalarExpr(E->getArg(1));
22839  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22840  return Builder.CreateCall(Callee, {Tag, Obj});
22841  }
22842  case WebAssembly::BI__builtin_wasm_rethrow: {
22843  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22844  return Builder.CreateCall(Callee);
22845  }
22846  case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22847  Value *Addr = EmitScalarExpr(E->getArg(0));
22848  Value *Expected = EmitScalarExpr(E->getArg(1));
22849  Value *Timeout = EmitScalarExpr(E->getArg(2));
22850  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22851  return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22852  }
22853  case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22854  Value *Addr = EmitScalarExpr(E->getArg(0));
22855  Value *Expected = EmitScalarExpr(E->getArg(1));
22856  Value *Timeout = EmitScalarExpr(E->getArg(2));
22857  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22858  return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22859  }
22860  case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22861  Value *Addr = EmitScalarExpr(E->getArg(0));
22862  Value *Count = EmitScalarExpr(E->getArg(1));
22863  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22864  return Builder.CreateCall(Callee, {Addr, Count});
22865  }
22866  case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22867  case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22868  case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22869  case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22870  Value *Src = EmitScalarExpr(E->getArg(0));
22871  llvm::Type *ResT = ConvertType(E->getType());
22872  Function *Callee =
22873  CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22874  return Builder.CreateCall(Callee, {Src});
22875  }
22876  case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22877  case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22878  case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22879  case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22880  Value *Src = EmitScalarExpr(E->getArg(0));
22881  llvm::Type *ResT = ConvertType(E->getType());
22882  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22883  {ResT, Src->getType()});
22884  return Builder.CreateCall(Callee, {Src});
22885  }
22886  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22887  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22888  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22889  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22890  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22891  Value *Src = EmitScalarExpr(E->getArg(0));
22892  llvm::Type *ResT = ConvertType(E->getType());
22893  Function *Callee =
22894  CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22895  return Builder.CreateCall(Callee, {Src});
22896  }
22897  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22898  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22899  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22900  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22901  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22902  Value *Src = EmitScalarExpr(E->getArg(0));
22903  llvm::Type *ResT = ConvertType(E->getType());
22904  Function *Callee =
22905  CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22906  return Builder.CreateCall(Callee, {Src});
22907  }
22908  case WebAssembly::BI__builtin_wasm_min_f32:
22909  case WebAssembly::BI__builtin_wasm_min_f64:
22910  case WebAssembly::BI__builtin_wasm_min_f32x4:
22911  case WebAssembly::BI__builtin_wasm_min_f64x2: {
22912  Value *LHS = EmitScalarExpr(E->getArg(0));
22913  Value *RHS = EmitScalarExpr(E->getArg(1));
22914  Function *Callee =
22915  CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22916  return Builder.CreateCall(Callee, {LHS, RHS});
22917  }
22918  case WebAssembly::BI__builtin_wasm_max_f32:
22919  case WebAssembly::BI__builtin_wasm_max_f64:
22920  case WebAssembly::BI__builtin_wasm_max_f32x4:
22921  case WebAssembly::BI__builtin_wasm_max_f64x2: {
22922  Value *LHS = EmitScalarExpr(E->getArg(0));
22923  Value *RHS = EmitScalarExpr(E->getArg(1));
22924  Function *Callee =
22925  CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22926  return Builder.CreateCall(Callee, {LHS, RHS});
22927  }
22928  case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22929  case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22930  Value *LHS = EmitScalarExpr(E->getArg(0));
22931  Value *RHS = EmitScalarExpr(E->getArg(1));
22932  Function *Callee =
22933  CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22934  return Builder.CreateCall(Callee, {LHS, RHS});
22935  }
22936  case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22937  case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22938  Value *LHS = EmitScalarExpr(E->getArg(0));
22939  Value *RHS = EmitScalarExpr(E->getArg(1));
22940  Function *Callee =
22941  CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22942  return Builder.CreateCall(Callee, {LHS, RHS});
22943  }
22944  case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22945  case WebAssembly::BI__builtin_wasm_floor_f32x4:
22946  case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22947  case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22948  case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22949  case WebAssembly::BI__builtin_wasm_floor_f64x2:
22950  case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22951  case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22952  unsigned IntNo;
22953  switch (BuiltinID) {
22954  case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22955  case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22956  IntNo = Intrinsic::ceil;
22957  break;
22958  case WebAssembly::BI__builtin_wasm_floor_f32x4:
22959  case WebAssembly::BI__builtin_wasm_floor_f64x2:
22960  IntNo = Intrinsic::floor;
22961  break;
22962  case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22963  case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22964  IntNo = Intrinsic::trunc;
22965  break;
22966  case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22967  case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22968  IntNo = Intrinsic::nearbyint;
22969  break;
22970  default:
22971  llvm_unreachable("unexpected builtin ID");
22972  }
22973  Value *Value = EmitScalarExpr(E->getArg(0));
22974  Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
22975  return Builder.CreateCall(Callee, Value);
22976  }
22977  case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22978  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22979  return Builder.CreateCall(Callee);
22980  }
22981  case WebAssembly::BI__builtin_wasm_ref_null_func: {
22982  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22983  return Builder.CreateCall(Callee);
22984  }
22985  case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22986  Value *Src = EmitScalarExpr(E->getArg(0));
22987  Value *Indices = EmitScalarExpr(E->getArg(1));
22988  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22989  return Builder.CreateCall(Callee, {Src, Indices});
22990  }
22991  case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
22992  case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
22993  case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
22994  case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
22995  case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
22996  case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
22997  case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
22998  case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
22999  unsigned IntNo;
23000  switch (BuiltinID) {
23001  case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
23002  case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
23003  IntNo = Intrinsic::sadd_sat;
23004  break;
23005  case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
23006  case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
23007  IntNo = Intrinsic::uadd_sat;
23008  break;
23009  case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
23010  case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
23011  IntNo = Intrinsic::wasm_sub_sat_signed;
23012  break;
23013  case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
23014  case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
23015  IntNo = Intrinsic::wasm_sub_sat_unsigned;
23016  break;
23017  default:
23018  llvm_unreachable("unexpected builtin ID");
23019  }
23020  Value *LHS = EmitScalarExpr(E->getArg(0));
23021  Value *RHS = EmitScalarExpr(E->getArg(1));
23022  Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
23023  return Builder.CreateCall(Callee, {LHS, RHS});
23024  }
23025  case WebAssembly::BI__builtin_wasm_abs_i8x16:
23026  case WebAssembly::BI__builtin_wasm_abs_i16x8:
23027  case WebAssembly::BI__builtin_wasm_abs_i32x4:
23028  case WebAssembly::BI__builtin_wasm_abs_i64x2: {
23029  Value *Vec = EmitScalarExpr(E->getArg(0));
23030  Value *Neg = Builder.CreateNeg(Vec, "neg");
23031  Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
23032  Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
23033  return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
23034  }
23035  case WebAssembly::BI__builtin_wasm_min_s_i8x16:
23036  case WebAssembly::BI__builtin_wasm_min_u_i8x16:
23037  case WebAssembly::BI__builtin_wasm_max_s_i8x16:
23038  case WebAssembly::BI__builtin_wasm_max_u_i8x16:
23039  case WebAssembly::BI__builtin_wasm_min_s_i16x8:
23040  case WebAssembly::BI__builtin_wasm_min_u_i16x8:
23041  case WebAssembly::BI__builtin_wasm_max_s_i16x8:
23042  case WebAssembly::BI__builtin_wasm_max_u_i16x8:
23043  case WebAssembly::BI__builtin_wasm_min_s_i32x4:
23044  case WebAssembly::BI__builtin_wasm_min_u_i32x4:
23045  case WebAssembly::BI__builtin_wasm_max_s_i32x4:
23046  case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
23047  Value *LHS = EmitScalarExpr(E->getArg(0));
23048  Value *RHS = EmitScalarExpr(E->getArg(1));
23049  Value *ICmp;
23050  switch (BuiltinID) {
23051  case WebAssembly::BI__builtin_wasm_min_s_i8x16:
23052  case WebAssembly::BI__builtin_wasm_min_s_i16x8:
23053  case WebAssembly::BI__builtin_wasm_min_s_i32x4:
23054  ICmp = Builder.CreateICmpSLT(LHS, RHS);
23055  break;
23056  case WebAssembly::BI__builtin_wasm_min_u_i8x16:
23057  case WebAssembly::BI__builtin_wasm_min_u_i16x8:
23058  case WebAssembly::BI__builtin_wasm_min_u_i32x4:
23059  ICmp = Builder.CreateICmpULT(LHS, RHS);
23060  break;
23061  case WebAssembly::BI__builtin_wasm_max_s_i8x16:
23062  case WebAssembly::BI__builtin_wasm_max_s_i16x8:
23063  case WebAssembly::BI__builtin_wasm_max_s_i32x4:
23064  ICmp = Builder.CreateICmpSGT(LHS, RHS);
23065  break;
23066  case WebAssembly::BI__builtin_wasm_max_u_i8x16:
23067  case WebAssembly::BI__builtin_wasm_max_u_i16x8:
23068  case WebAssembly::BI__builtin_wasm_max_u_i32x4:
23069  ICmp = Builder.CreateICmpUGT(LHS, RHS);
23070  break;
23071  default:
23072  llvm_unreachable("unexpected builtin ID");
23073  }
23074  return Builder.CreateSelect(ICmp, LHS, RHS);
23075  }
23076  case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
23077  case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
23078  Value *LHS = EmitScalarExpr(E->getArg(0));
23079  Value *RHS = EmitScalarExpr(E->getArg(1));
23080  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
23081  ConvertType(E->getType()));
23082  return Builder.CreateCall(Callee, {LHS, RHS});
23083  }
23084  case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
23085  Value *LHS = EmitScalarExpr(E->getArg(0));
23086  Value *RHS = EmitScalarExpr(E->getArg(1));
23087  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
23088  return Builder.CreateCall(Callee, {LHS, RHS});
23089  }
23090  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
23091  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
23092  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
23093  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
23094  Value *Vec = EmitScalarExpr(E->getArg(0));
23095  unsigned IntNo;
23096  switch (BuiltinID) {
23097  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
23098  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
23099  IntNo = Intrinsic::wasm_extadd_pairwise_signed;
23100  break;
23101  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
23102  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
23103  IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
23104  break;
23105  default:
23106  llvm_unreachable("unexpected builtin ID");
23107  }
23108 
23109  Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
23110  return Builder.CreateCall(Callee, Vec);
23111  }
23112  case WebAssembly::BI__builtin_wasm_bitselect: {
23113  Value *V1 = EmitScalarExpr(E->getArg(0));
23114  Value *V2 = EmitScalarExpr(E->getArg(1));
23115  Value *C = EmitScalarExpr(E->getArg(2));
23116  Function *Callee =
23117  CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
23118  return Builder.CreateCall(Callee, {V1, V2, C});
23119  }
23120  case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
23121  Value *LHS = EmitScalarExpr(E->getArg(0));
23122  Value *RHS = EmitScalarExpr(E->getArg(1));
23123  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
23124  return Builder.CreateCall(Callee, {LHS, RHS});
23125  }
23126  case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
23127  Value *Vec = EmitScalarExpr(E->getArg(0));
23128  Function *Callee =
23129  CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
23130  return Builder.CreateCall(Callee, {Vec});
23131  }
23132  case WebAssembly::BI__builtin_wasm_any_true_v128:
23133  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
23134  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
23135  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
23136  case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
23137  unsigned IntNo;
23138  switch (BuiltinID) {
23139  case WebAssembly::BI__builtin_wasm_any_true_v128:
23140  IntNo = Intrinsic::wasm_anytrue;
23141  break;
23142  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
23143  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
23144  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
23145  case WebAssembly::BI__builtin_wasm_all_true_i64x2:
23146  IntNo = Intrinsic::wasm_alltrue;
23147  break;
23148  default:
23149  llvm_unreachable("unexpected builtin ID");
23150  }
23151  Value *Vec = EmitScalarExpr(E->getArg(0));
23152  Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
23153  return Builder.CreateCall(Callee, {Vec});
23154  }
23155  case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
23156  case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
23157  case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
23158  case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
23159  Value *Vec = EmitScalarExpr(E->getArg(0));
23160  Function *Callee =
23161  CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
23162  return Builder.CreateCall(Callee, {Vec});
23163  }
23164  case WebAssembly::BI__builtin_wasm_abs_f32x4:
23165  case WebAssembly::BI__builtin_wasm_abs_f64x2: {
23166  Value *Vec = EmitScalarExpr(E->getArg(0));
23167  Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
23168  return Builder.CreateCall(Callee, {Vec});
23169  }
23170  case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
23171  case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
23172  Value *Vec = EmitScalarExpr(E->getArg(0));
23173  Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
23174  return Builder.CreateCall(Callee, {Vec});
23175  }
23176  case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
23177  case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
23178  case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
23179  case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
23180  Value *Low = EmitScalarExpr(E->getArg(0));
23181  Value *High = EmitScalarExpr(E->getArg(1));
23182  unsigned IntNo;
23183  switch (BuiltinID) {
23184  case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
23185  case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
23186  IntNo = Intrinsic::wasm_narrow_signed;
23187  break;
23188  case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
23189  case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
23190  IntNo = Intrinsic::wasm_narrow_unsigned;
23191  break;
23192  default:
23193  llvm_unreachable("unexpected builtin ID");
23194  }
23195  Function *Callee =
23196  CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
23197  return Builder.CreateCall(Callee, {Low, High});
23198  }
23199  case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
23200  case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
23201  Value *Vec = EmitScalarExpr(E->getArg(0));
23202  unsigned IntNo;
23203  switch (BuiltinID) {
23204  case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
23205  IntNo = Intrinsic::fptosi_sat;
23206  break;
23207  case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
23208  IntNo = Intrinsic::fptoui_sat;
23209  break;
23210  default:
23211  llvm_unreachable("unexpected builtin ID");
23212  }
23213  llvm::Type *SrcT = Vec->getType();
23214  llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
23215  Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
23216  Value *Trunc = Builder.CreateCall(Callee, Vec);
23217  Value *Splat = Constant::getNullValue(TruncT);
23218  return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
23219  }
23220  case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
23221  Value *Ops[18];
23222  size_t OpIdx = 0;
23223  Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
23224  Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
23225  while (OpIdx < 18) {
23226  std::optional<llvm::APSInt> LaneConst =
23227  E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
23228  assert(LaneConst && "Constant arg isn't actually constant?");
23229  Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
23230  }
23231  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
23232  return Builder.CreateCall(Callee, Ops);
23233  }
23234  case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
23235  case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
23236  case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
23237  case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
23238  Value *A = EmitScalarExpr(E->getArg(0));
23239  Value *B = EmitScalarExpr(E->getArg(1));
23240  Value *C = EmitScalarExpr(E->getArg(2));
23241  unsigned IntNo;
23242  switch (BuiltinID) {
23243  case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
23244  case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
23245  IntNo = Intrinsic::wasm_relaxed_madd;
23246  break;
23247  case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
23248  case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
23249  IntNo = Intrinsic::wasm_relaxed_nmadd;
23250  break;
23251  default:
23252  llvm_unreachable("unexpected builtin ID");
23253  }
23254  Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
23255  return Builder.CreateCall(Callee, {A, B, C});
23256  }
23257  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
23258  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
23259  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
23260  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
23261  Value *A = EmitScalarExpr(E->getArg(0));
23262  Value *B = EmitScalarExpr(E->getArg(1));
23263  Value *C = EmitScalarExpr(E->getArg(2));
23264  Function *Callee =
23265  CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
23266  return Builder.CreateCall(Callee, {A, B, C});
23267  }
23268  case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
23269  Value *Src = EmitScalarExpr(E->getArg(0));
23270  Value *Indices = EmitScalarExpr(E->getArg(1));
23271  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
23272  return Builder.CreateCall(Callee, {Src, Indices});
23273  }
23274  case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
23275  case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
23276  case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
23277  case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
23278  Value *LHS = EmitScalarExpr(E->getArg(0));
23279  Value *RHS = EmitScalarExpr(E->getArg(1));
23280  unsigned IntNo;
23281  switch (BuiltinID) {
23282  case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
23283  case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
23284  IntNo = Intrinsic::wasm_relaxed_min;
23285  break;
23286  case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
23287  case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
23288  IntNo = Intrinsic::wasm_relaxed_max;
23289  break;
23290  default:
23291  llvm_unreachable("unexpected builtin ID");
23292  }
23293  Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
23294  return Builder.CreateCall(Callee, {LHS, RHS});
23295  }
23296  case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
23297  case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
23298  case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
23299  case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
23300  Value *Vec = EmitScalarExpr(E->getArg(0));
23301  unsigned IntNo;
23302  switch (BuiltinID) {
23303  case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
23304  IntNo = Intrinsic::wasm_relaxed_trunc_signed;
23305  break;
23306  case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
23307  IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
23308  break;
23309  case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
23310  IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
23311  break;
23312  case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
23313  IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
23314  break;
23315  default:
23316  llvm_unreachable("unexpected builtin ID");
23317  }
23318  Function *Callee = CGM.getIntrinsic(IntNo);
23319  return Builder.CreateCall(Callee, {Vec});
23320  }
23321  case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
23322  Value *LHS = EmitScalarExpr(E->getArg(0));
23323  Value *RHS = EmitScalarExpr(E->getArg(1));
23324  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
23325  return Builder.CreateCall(Callee, {LHS, RHS});
23326  }
23327  case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
23328  Value *LHS = EmitScalarExpr(E->getArg(0));
23329  Value *RHS = EmitScalarExpr(E->getArg(1));
23330  Function *Callee =
23331  CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
23332  return Builder.CreateCall(Callee, {LHS, RHS});
23333  }
23334  case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
23335  Value *LHS = EmitScalarExpr(E->getArg(0));
23336  Value *RHS = EmitScalarExpr(E->getArg(1));
23337  Value *Acc = EmitScalarExpr(E->getArg(2));
23338  Function *Callee =
23339  CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
23340  return Builder.CreateCall(Callee, {LHS, RHS, Acc});
23341  }
23342  case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
23343  Value *LHS = EmitScalarExpr(E->getArg(0));
23344  Value *RHS = EmitScalarExpr(E->getArg(1));
23345  Value *Acc = EmitScalarExpr(E->getArg(2));
23346  Function *Callee =
23347  CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
23348  return Builder.CreateCall(Callee, {LHS, RHS, Acc});
23349  }
23350  case WebAssembly::BI__builtin_wasm_table_get: {
23351  assert(E->getArg(0)->getType()->isArrayType());
23352  Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
23353  Value *Index = EmitScalarExpr(E->getArg(1));
23354  Function *Callee;
23356  Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
23357  else if (E->getType().isWebAssemblyFuncrefType())
23358  Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
23359  else
23360  llvm_unreachable(
23361  "Unexpected reference type for __builtin_wasm_table_get");
23362  return Builder.CreateCall(Callee, {Table, Index});
23363  }
23364  case WebAssembly::BI__builtin_wasm_table_set: {
23365  assert(E->getArg(0)->getType()->isArrayType());
23366  Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
23367  Value *Index = EmitScalarExpr(E->getArg(1));
23368  Value *Val = EmitScalarExpr(E->getArg(2));
23369  Function *Callee;
23370  if (E->getArg(2)->getType().isWebAssemblyExternrefType())
23371  Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
23372  else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
23373  Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
23374  else
23375  llvm_unreachable(
23376  "Unexpected reference type for __builtin_wasm_table_set");
23377  return Builder.CreateCall(Callee, {Table, Index, Val});
23378  }
23379  case WebAssembly::BI__builtin_wasm_table_size: {
23380  assert(E->getArg(0)->getType()->isArrayType());
23381  Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
23382  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
23383  return Builder.CreateCall(Callee, Value);
23384  }
23385  case WebAssembly::BI__builtin_wasm_table_grow: {
23386  assert(E->getArg(0)->getType()->isArrayType());
23387  Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
23388  Value *Val = EmitScalarExpr(E->getArg(1));
23389  Value *NElems = EmitScalarExpr(E->getArg(2));
23390 
23391  Function *Callee;
23392  if (E->getArg(1)->getType().isWebAssemblyExternrefType())
23393  Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
23394  else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
23395  Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
23396  else
23397  llvm_unreachable(
23398  "Unexpected reference type for __builtin_wasm_table_grow");
23399 
23400  return Builder.CreateCall(Callee, {Table, Val, NElems});
23401  }
23402  case WebAssembly::BI__builtin_wasm_table_fill: {
23403  assert(E->getArg(0)->getType()->isArrayType());
23404  Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
23405  Value *Index = EmitScalarExpr(E->getArg(1));
23406  Value *Val = EmitScalarExpr(E->getArg(2));
23407  Value *NElems = EmitScalarExpr(E->getArg(3));
23408 
23409  Function *Callee;
23410  if (E->getArg(2)->getType().isWebAssemblyExternrefType())
23411  Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
23412  else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
23413  Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
23414  else
23415  llvm_unreachable(
23416  "Unexpected reference type for __builtin_wasm_table_fill");
23417 
23418  return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
23419  }
23420  case WebAssembly::BI__builtin_wasm_table_copy: {
23421  assert(E->getArg(0)->getType()->isArrayType());
23422  Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
23423  Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
23424  Value *DstIdx = EmitScalarExpr(E->getArg(2));
23425  Value *SrcIdx = EmitScalarExpr(E->getArg(3));
23426  Value *NElems = EmitScalarExpr(E->getArg(4));
23427 
23428  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
23429 
23430  return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
23431  }
23432  default:
23433  return nullptr;
23434  }
23435 }
23436 
23437 static std::pair<Intrinsic::ID, unsigned>
23439  struct Info {
23440  unsigned BuiltinID;
23441  Intrinsic::ID IntrinsicID;
23442  unsigned VecLen;
23443  };
23444  static Info Infos[] = {
23445 #define CUSTOM_BUILTIN_MAPPING(x,s) \
23446  { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
23447  CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
23448  CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
23449  CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
23450  CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
23451  CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
23452  CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
23453  CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
23454  CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
23455  CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
23456  CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
23457  CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
23458  CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
23459  CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
23460  CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
23461  CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
23462  CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
23463  CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
23464  CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
23465  CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
23466  CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
23467  CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
23468  CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
23469  // Legacy builtins that take a vector in place of a vector predicate.
23470  CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
23471  CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
23472  CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
23473  CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
23474  CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
23475  CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
23476  CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
23477  CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
23478 #include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
23479 #undef CUSTOM_BUILTIN_MAPPING
23480  };
23481 
23482  auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
23483  static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
23484  (void)SortOnce;
23485 
23486  const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
23487  if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
23488  return {Intrinsic::not_intrinsic, 0};
23489 
23490  return {F->IntrinsicID, F->VecLen};
23491 }
23492 
23494  const CallExpr *E) {
23495  Intrinsic::ID ID;
23496  unsigned VecLen;
23497  std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
23498 
23499  auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
23500  // The base pointer is passed by address, so it needs to be loaded.
23502  Address BP = Address(A.emitRawPointer(*this), Int8PtrTy, A.getAlignment());
23503  llvm::Value *Base = Builder.CreateLoad(BP);
23504  // The treatment of both loads and stores is the same: the arguments for
23505  // the builtin are the same as the arguments for the intrinsic.
23506  // Load:
23507  // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
23508  // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
23509  // Store:
23510  // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
23511  // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
23513  for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
23514  Ops.push_back(EmitScalarExpr(E->getArg(i)));
23515 
23516  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
23517  // The load intrinsics generate two results (Value, NewBase), stores
23518  // generate one (NewBase). The new base address needs to be stored.
23519  llvm::Value *NewBase =
23520  IsLoad ? Builder.CreateExtractValue(Result, 1) : Result;
23521  llvm::Value *LV = EmitScalarExpr(E->getArg(0));
23522  Address Dest = EmitPointerWithAlignment(E->getArg(0));
23523  llvm::Value *RetVal =
23524  Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
23525  if (IsLoad)
23526  RetVal = Builder.CreateExtractValue(Result, 0);
23527  return RetVal;
23528  };
23529 
23530  // Handle the conversion of bit-reverse load intrinsics to bit code.
23531  // The intrinsic call after this function only reads from memory and the
23532  // write to memory is dealt by the store instruction.
23533  auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
23534  // The intrinsic generates one result, which is the new value for the base
23535  // pointer. It needs to be returned. The result of the load instruction is
23536  // passed to intrinsic by address, so the value needs to be stored.
23537  llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
23538 
23539  // Expressions like &(*pt++) will be incremented per evaluation.
23540  // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
23541  // per call.
23542  Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
23543  DestAddr = DestAddr.withElementType(Int8Ty);
23544  llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
23545 
23546  // Operands are Base, Dest, Modifier.
23547  // The intrinsic format in LLVM IR is defined as
23548  // { ValueType, i8* } (i8*, i32).
23549  llvm::Value *Result = Builder.CreateCall(
23550  CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
23551 
23552  // The value needs to be stored as the variable is passed by reference.
23553  llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
23554 
23555  // The store needs to be truncated to fit the destination type.
23556  // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
23557  // to be handled with stores of respective destination type.
23558  DestVal = Builder.CreateTrunc(DestVal, DestTy);
23559 
23560  Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
23561  // The updated value of the base pointer is returned.
23562  return Builder.CreateExtractValue(Result, 1);
23563  };
23564 
23565  auto V2Q = [this, VecLen] (llvm::Value *Vec) {
23566  Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
23567  : Intrinsic::hexagon_V6_vandvrt;
23568  return Builder.CreateCall(CGM.getIntrinsic(ID),
23569  {Vec, Builder.getInt32(-1)});
23570  };
23571  auto Q2V = [this, VecLen] (llvm::Value *Pred) {
23572  Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
23573  : Intrinsic::hexagon_V6_vandqrt;
23574  return Builder.CreateCall(CGM.getIntrinsic(ID),
23575  {Pred, Builder.getInt32(-1)});
23576  };
23577 
23578  switch (BuiltinID) {
23579  // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
23580  // and the corresponding C/C++ builtins use loads/stores to update
23581  // the predicate.
23582  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
23583  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
23584  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
23585  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
23586  // Get the type from the 0-th argument.
23587  llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
23588  Address PredAddr =
23590  llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
23591  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
23592  {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
23593 
23594  llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
23595  Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
23596  PredAddr.getAlignment());
23597  return Builder.CreateExtractValue(Result, 0);
23598  }
23599  // These are identical to the builtins above, except they don't consume
23600  // input carry, only generate carry-out. Since they still produce two
23601  // outputs, generate the store of the predicate, but no load.
23602  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
23603  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
23604  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
23605  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
23606  // Get the type from the 0-th argument.
23607  llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
23608  Address PredAddr =
23610  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
23611  {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
23612 
23613  llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
23614  Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
23615  PredAddr.getAlignment());
23616  return Builder.CreateExtractValue(Result, 0);
23617  }
23618 
23619  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
23620  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
23621  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
23622  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
23623  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
23624  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
23625  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
23626  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
23628  const Expr *PredOp = E->getArg(0);
23629  // There will be an implicit cast to a boolean vector. Strip it.
23630  if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
23631  if (Cast->getCastKind() == CK_BitCast)
23632  PredOp = Cast->getSubExpr();
23633  Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
23634  }
23635  for (int i = 1, e = E->getNumArgs(); i != e; ++i)
23636  Ops.push_back(EmitScalarExpr(E->getArg(i)));
23637  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
23638  }
23639 
23640  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
23641  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
23642  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
23643  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
23644  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
23645  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
23646  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
23647  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
23648  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
23649  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
23650  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
23651  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
23652  return MakeCircOp(ID, /*IsLoad=*/true);
23653  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
23654  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
23655  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
23656  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
23657  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
23658  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
23659  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
23660  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
23661  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
23662  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
23663  return MakeCircOp(ID, /*IsLoad=*/false);
23664  case Hexagon::BI__builtin_brev_ldub:
23665  return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
23666  case Hexagon::BI__builtin_brev_ldb:
23667  return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
23668  case Hexagon::BI__builtin_brev_lduh:
23669  return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
23670  case Hexagon::BI__builtin_brev_ldh:
23671  return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
23672  case Hexagon::BI__builtin_brev_ldw:
23673  return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
23674  case Hexagon::BI__builtin_brev_ldd:
23675  return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
23676  } // switch
23677 
23678  return nullptr;
23679 }
23680 
23682  ReturnValueSlot ReturnValue) {
23683  const Expr *PtrArg = E->getArg(0);
23684  QualType ArgType = PtrArg->getType();
23685  StringRef AnnotStr = "__builtin_intel_fpga_reg";
23686 
23687  if (ArgType->isRecordType()) {
23688  Address DstAddr = ReturnValue.getValue();
23689  EmitAnyExprToMem(PtrArg, DstAddr, ArgType.getQualifiers(), true);
23690  Address A =
23691  EmitIntelFPGAFieldAnnotations(E->getExprLoc(), DstAddr, AnnotStr);
23692  return RValue::getAggregate(A);
23693  }
23694 
23695  // if scalar type
23696  llvm::Value *V = EmitScalarExpr(PtrArg);
23697 
23698  // llvm.annotation does not accept anything but integer types.
23699  llvm::Type *OrigVType = V->getType();
23700  if (!OrigVType->isIntegerTy()) {
23701  IntegerType *IntTy =
23702  Builder.getIntNTy(CGM.getDataLayout().getTypeSizeInBits(OrigVType));
23703  V = Builder.CreateBitOrPointerCast(V, IntTy);
23704  }
23705  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
23706  {V->getType(), CGM.ConstGlobalsPtrTy});
23707  llvm::Value *AnnotatedV =
23708  EmitAnnotationCall(F, V, AnnotStr, E->getExprLoc());
23709 
23710  if (AnnotatedV->getType() != OrigVType) {
23711  AnnotatedV = Builder.CreateBitOrPointerCast(AnnotatedV, OrigVType);
23712  }
23713 
23714  return RValue::get(AnnotatedV);
23715 }
23716 
23718  // Arguments
23719  const Expr *PtrArg = E->getArg(0);
23720  Value *PtrVal = EmitScalarExpr(PtrArg);
23721  ASTContext &Ctx = getContext();
23722 
23723  // Create the pointer annotation
23724  Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation,
23725  {PtrVal->getType(), CGM.ConstGlobalsPtrTy});
23726  SmallString<256> AnnotStr;
23727  llvm::raw_svector_ostream Out(AnnotStr);
23728 
23729  auto AddArgValue = [&E, &Ctx, &Out](unsigned NumOfArg, StringRef StringToAdd,
23730  int DefaultValue = INT_MIN) {
23731  std::optional<llvm::APSInt> IntVal =
23732  (E->getNumArgs() > NumOfArg)
23733  ? E->getArg(NumOfArg)->getIntegerConstantExpr(Ctx)
23734  : APSInt::get(DefaultValue);
23735  assert(IntVal.has_value() && "Constant arg isn't actually constant?");
23736  Out << "{" << StringToAdd << ":" << toString(*IntVal, 10) << "}";
23737  };
23738 
23739  AddArgValue(1, "params");
23740  AddArgValue(2, "cache-size");
23741  // There are four optional arguments with the following default values:
23742  // const int32_t AnchorID = -1
23743  // const int32_t TargetAnchor = 0
23744  // const int32_t Type = 0
23745  // const int32_t Cycle = 0
23746  // Emit default values or use provided.
23747  AddArgValue(3, "anchor-id", -1);
23748  AddArgValue(4, "target-anchor", 0);
23749  AddArgValue(5, "type", 0);
23750  AddArgValue(6, "cycle", 0);
23751 
23752  llvm::Value *Ann = EmitAnnotationCall(F, PtrVal, AnnotStr, SourceLocation());
23753 
23754  cast<CallBase>(Ann)->setDoesNotAccessMemory();
23755 
23756  return RValue::get(Ann);
23757 }
23758 
23759 static bool hasFuncNameRequestedFPAccuracy(StringRef Name,
23760  const LangOptions &LangOpts) {
23761  auto FuncMapIt = LangOpts.FPAccuracyFuncMap.find(Name.str());
23762  return (FuncMapIt != LangOpts.FPAccuracyFuncMap.end());
23763 }
23764 
23766  llvm::FunctionType *IRFuncTy, const SmallVectorImpl<llvm::Value *> &IRArgs,
23767  llvm::Value *FnPtr, StringRef Name, unsigned FDBuiltinID) {
23768  unsigned FPAccuracyIntrinsicID = 0;
23769  if (FDBuiltinID == 0) {
23770  FPAccuracyIntrinsicID =
23771  llvm::StringSwitch<unsigned>(Name)
23772  .Case("fadd", llvm::Intrinsic::fpbuiltin_fadd)
23773  .Case("fdiv", llvm::Intrinsic::fpbuiltin_fdiv)
23774  .Case("fmul", llvm::Intrinsic::fpbuiltin_fmul)
23775  .Case("fsub", llvm::Intrinsic::fpbuiltin_fsub)
23776  .Case("frem", llvm::Intrinsic::fpbuiltin_frem)
23777  .Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
23778  .Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
23779  .Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
23780  .Default(0);
23781  } else {
23782  // The function has a clang builtin. Create an attribute for it
23783  // only if it has an fpbuiltin intrinsic.
23784  switch (FDBuiltinID) {
23785  default:
23786  // If the function has a clang builtin but doesn't have an
23787  // fpbuiltin, it will be generated with no 'fpbuiltin-max-error'
23788  // attribute.
23789  return nullptr;
23790  case Builtin::BItan:
23791  case Builtin::BItanf:
23792  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_tan;
23793  break;
23794  case Builtin::BItanh:
23795  case Builtin::BItanhf:
23796  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_tanh;
23797  break;
23798  case Builtin::BIlog2:
23799  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_log2;
23800  break;
23801  case Builtin::BIlog1p:
23802  case Builtin::BIlog1pf:
23803  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_log1p;
23804  break;
23805  case Builtin::BIcos:
23806  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_cos;
23807  break;
23808  case Builtin::BIcosh:
23809  case Builtin::BIcoshf:
23810  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_cosh;
23811  break;
23812  case Builtin::BIacos:
23813  case Builtin::BIacosf:
23814  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_acos;
23815  break;
23816  case Builtin::BIacosh:
23817  case Builtin::BIacoshf:
23818  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_acosh;
23819  break;
23820  case Builtin::BIsin:
23821  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_sin;
23822  break;
23823  case Builtin::BIsinh:
23824  case Builtin::BIsinhf:
23825  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_sinh;
23826  break;
23827  case Builtin::BIasin:
23828  case Builtin::BIasinf:
23829  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_asin;
23830  break;
23831  case Builtin::BIasinh:
23832  case Builtin::BIasinhf:
23833  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_asinh;
23834  break;
23835  case Builtin::BIatan:
23836  case Builtin::BIatanf:
23837  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_atan;
23838  break;
23839  case Builtin::BIatanh:
23840  case Builtin::BIatanhf:
23841  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_atanh;
23842  break;
23843  case Builtin::BIatan2:
23844  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_atan2;
23845  break;
23846  case Builtin::BIerf:
23847  case Builtin::BIerff:
23848  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_erf;
23849  break;
23850  case Builtin::BIerfc:
23851  case Builtin::BIerfcf:
23852  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_erfc;
23853  break;
23854  case Builtin::BIexp:
23855  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_exp;
23856  break;
23857  case Builtin::BIexp2:
23858  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_exp2;
23859  break;
23860  case Builtin::BIexpm1:
23861  case Builtin::BIexpm1f:
23862  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_expm1;
23863  break;
23864  case Builtin::BIhypot:
23865  case Builtin::BIhypotf:
23866  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_hypot;
23867  break;
23868  case Builtin::BIldexp:
23869  case Builtin::BIldexpf:
23870  FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_ldexp;
23871  break;
23872  }
23873  }
23874  if (!FPAccuracyIntrinsicID)
23875  return nullptr;
23876 
23877  // Create an intrinsic only if it exists in the map, or if there
23878  // a TU fp-accuracy requested.
23879  const LangOptions &LangOpts = getLangOpts();
23880  if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
23881  !LangOpts.FPAccuracyVal.empty()) {
23882  llvm::Function *Func =
23883  CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
23884  return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
23885  FPAccuracyIntrinsicID);
23886  }
23887  return nullptr;
23888 }
23889 
23891  const Expr *PtrArg = E->getArg(0);
23892  Value *PtrVal = EmitScalarExpr(PtrArg);
23893  auto &Ctx = CGM.getContext();
23894 
23895  // Create the pointer annotation.
23896  Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation,
23897  {PtrVal->getType(), CGM.ConstGlobalsPtrTy});
23898 
23899  SmallString<256> AnnotStr;
23900  llvm::raw_svector_ostream Out(AnnotStr);
23901 
23903 
23904  for (unsigned I = 1, N = E->getNumArgs(); I <= N / 2; I++) {
23905  auto Arg = E->getArg(I)->IgnoreParenCasts();
23906  const StringLiteral *Str = dyn_cast<const StringLiteral>(Arg);
23907  Expr::EvalResult Result;
23908  if (!Str && Arg->EvaluateAsRValue(Result, Ctx) && Result.Val.isLValue()) {
23909  const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
23910  Str = dyn_cast<const StringLiteral>(LVE);
23911  }
23912  assert(Str && "Constant parameter string is invalid?");
23913 
23914  auto IntVal = E->getArg(I + N / 2)->getIntegerConstantExpr(Ctx);
23915  assert(IntVal.has_value() &&
23916  "Constant integer arg isn't actually constant?");
23917 
23918  Properties.push_back(
23919  std::make_pair(Str->getString().str(), toString(IntVal.value(), 10)));
23920  }
23921 
23922  llvm::Value *Ann =
23923  EmitSYCLAnnotationCall(F, PtrVal, E->getExprLoc(), Properties);
23924  return RValue::get(Ann);
23925 }
23926 
23928  unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) {
23929  assert((BuiltinID == Builtin::BI__builtin_intel_sycl_alloca ||
23930  BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align) &&
23931  "Unexpected builtin");
23932 
23933  bool IsAlignedAlloca =
23934  BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align;
23935 
23936  constexpr unsigned InvalidIndex = -1;
23937  constexpr unsigned ElementTypeIndex = 0;
23938  const unsigned AlignmentIndex = IsAlignedAlloca ? 1 : InvalidIndex;
23939  const unsigned SpecNameIndex = IsAlignedAlloca ? 2 : 1;
23940  const unsigned DecorateAddressIndex = IsAlignedAlloca ? 3 : 2;
23941 
23942  const FunctionDecl *FD = E->getDirectCallee();
23943  assert(FD && "Expecting direct call to builtin");
23944 
23945  SourceLocation Loc = E->getExprLoc();
23946 
23947  // Get specialization constant ID.
23949  assert(TAL && "Expecting template argument list");
23950  ValueDecl *SpecConst = TAL->get(SpecNameIndex).getAsDecl();
23952  getContext(), NestedNameSpecifierLoc(), SourceLocation(), SpecConst,
23953  /*RefersToEnclosingVariableOrCapture=*/false, E->getExprLoc(),
23954  SpecConst->getType(), ExprValueKind::VK_LValue);
23955  llvm::Value *UID = EmitScalarExpr(
23956  SYCLUniqueStableIdExpr::Create(getContext(), Loc, Loc, Loc, Ref));
23957 
23958  // Get specialization ID pointer.
23959  llvm::Value *SpecConstPtr =
23960  EmitLValue(Ref, clang::CodeGen::KnownNonNull).getPointer(*this);
23961 
23962  // Get specialization constant buffer.
23963  // TODO: When this extension supports more targets, get RTBufferPtr from input
23964  // sycl::kernel_handler &.
23965  llvm::Value *RTBufferPtr = llvm::ConstantPointerNull::get(
23966  cast<llvm::PointerType>(SpecConstPtr->getType()));
23967 
23968  // Get allocation type.
23969  QualType AllocaType = TAL->get(ElementTypeIndex).getAsType();
23970  llvm::Type *Ty = CGM.getTypes().ConvertTypeForMem(AllocaType);
23971  unsigned AllocaAS = CGM.getDataLayout().getAllocaAddrSpace();
23972  llvm::Type *AllocaTy = llvm::PointerType::get(Builder.getContext(), AllocaAS);
23973 
23974  llvm::Constant *EltTyConst = llvm::Constant::getNullValue(Ty);
23975 
23976  llvm::Constant *Align = Builder.getInt64(
23977  IsAlignedAlloca
23978  ? TAL->get(AlignmentIndex).getAsIntegral().getZExtValue()
23979  : getContext().getTypeAlignInChars(AllocaType).getAsAlign().value());
23980 
23981  llvm::Value *Allocation = [&]() {
23982  // To implement automatic storage duration of the underlying memory object,
23983  // insert intrinsic call before `AllocaInsertPt`. These will be lowered to
23984  // an `alloca` or an equivalent construct in later compilation stages.
23985  IRBuilderBase::InsertPointGuard IPG(Builder);
23986  Builder.SetInsertPoint(AllocaInsertPt);
23987  llvm::CallInst *CI = Builder.CreateIntrinsic(
23988  AllocaTy, Intrinsic::sycl_alloca,
23989  {UID, SpecConstPtr, RTBufferPtr, EltTyConst, Align}, nullptr, "alloca");
23990 
23991  // Propagate function used aspects.
23992  llvm::Function *F = CI->getCalledFunction();
23993  constexpr llvm::StringLiteral MDName = "sycl_used_aspects";
23994  if (!F->getMetadata(MDName)) {
23995  auto *AspectAttr = FD->getAttr<SYCLUsesAspectsAttr>();
23996  assert(AspectAttr && AspectAttr->aspects_size() == 1 &&
23997  "Expecting a single aspect");
23998  llvm::APSInt AspectInt =
23999  (*AspectAttr->aspects_begin())->EvaluateKnownConstInt(getContext());
24000  llvm::Type *I32Ty = Builder.getInt32Ty();
24001  llvm::Constant *C = llvm::Constant::getIntegerValue(I32Ty, AspectInt);
24002  llvm::Metadata *AspectMD = llvm::ConstantAsMetadata::get(C);
24003  F->setMetadata(MDName, llvm::MDNode::get(Builder.getContext(), AspectMD));
24004  }
24005  return CI;
24006  }();
24007 
24008  // Perform AS cast if needed.
24009 
24010  constexpr int NoDecorated = 0;
24011  llvm::APInt Decorated = TAL->get(DecorateAddressIndex).getAsIntegral();
24012  // Both 'sycl::access::decorated::{yes and legacy}' lead to decorated (private
24013  // AS) pointer type. Perform cast if 'sycl::access::decorated::no'.
24014  if (Decorated == NoDecorated) {
24015  IRBuilderBase::InsertPointGuard IPG(Builder);
24016  Builder.SetInsertPoint(getPostAllocaInsertPoint());
24017  unsigned DestAddrSpace =
24018  getContext().getTargetAddressSpace(LangAS::Default);
24019  llvm::PointerType *DestTy =
24020  llvm::PointerType::get(Builder.getContext(), DestAddrSpace);
24021  Allocation = Builder.CreateAddrSpaceCast(Allocation, DestTy);
24022  }
24023 
24024  // If no slot is provided, simply return allocation.
24025  if (ReturnValue.isNull())
24026  return RValue::get(Allocation);
24027 
24028  // If a slot is provided, store pointer there.
24029  Builder.CreateStore(Allocation, ReturnValue.getValue());
24030  return RValue::getAggregate(ReturnValue.getValue());
24031 }
24032 
24034  const CallExpr *E,
24035  ReturnValueSlot ReturnValue) {
24037  llvm::Type *ResultType = ConvertType(E->getType());
24038 
24039  // Find out if any arguments are required to be integer constant expressions.
24040  unsigned ICEArguments = 0;
24042  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
24043  if (Error == ASTContext::GE_Missing_type) {
24044  // Vector intrinsics don't have a type string.
24045  assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
24046  BuiltinID <= clang::RISCV::LastRVVBuiltin);
24047  ICEArguments = 0;
24048  if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
24049  BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
24050  ICEArguments = 1 << 1;
24051  } else {
24052  assert(Error == ASTContext::GE_None && "Unexpected error");
24053  }
24054 
24055  if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
24056  ICEArguments |= (1 << 1);
24057  if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
24058  ICEArguments |= (1 << 2);
24059 
24060  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
24061  // Handle aggregate argument, namely RVV tuple types in segment load/store
24062  if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
24063  LValue L = EmitAggExprToLValue(E->getArg(i));
24064  llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this));
24065  Ops.push_back(AggValue);
24066  continue;
24067  }
24068  Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
24069  }
24070 
24071  Intrinsic::ID ID = Intrinsic::not_intrinsic;
24072  unsigned NF = 1;
24073  // The 0th bit simulates the `vta` of RVV
24074  // The 1st bit simulates the `vma` of RVV
24075  constexpr unsigned RVV_VTA = 0x1;
24076  constexpr unsigned RVV_VMA = 0x2;
24077  int PolicyAttrs = 0;
24078  bool IsMasked = false;
24079 
24080  // Required for overloaded intrinsics.
24081  llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
24082  switch (BuiltinID) {
24083  default: llvm_unreachable("unexpected builtin ID");
24084  case RISCV::BI__builtin_riscv_orc_b_32:
24085  case RISCV::BI__builtin_riscv_orc_b_64:
24086  case RISCV::BI__builtin_riscv_clz_32:
24087  case RISCV::BI__builtin_riscv_clz_64:
24088  case RISCV::BI__builtin_riscv_ctz_32:
24089  case RISCV::BI__builtin_riscv_ctz_64:
24090  case RISCV::BI__builtin_riscv_clmul_32:
24091  case RISCV::BI__builtin_riscv_clmul_64:
24092  case RISCV::BI__builtin_riscv_clmulh_32:
24093  case RISCV::BI__builtin_riscv_clmulh_64:
24094  case RISCV::BI__builtin_riscv_clmulr_32:
24095  case RISCV::BI__builtin_riscv_clmulr_64:
24096  case RISCV::BI__builtin_riscv_xperm4_32:
24097  case RISCV::BI__builtin_riscv_xperm4_64:
24098  case RISCV::BI__builtin_riscv_xperm8_32:
24099  case RISCV::BI__builtin_riscv_xperm8_64:
24100  case RISCV::BI__builtin_riscv_brev8_32:
24101  case RISCV::BI__builtin_riscv_brev8_64:
24102  case RISCV::BI__builtin_riscv_zip_32:
24103  case RISCV::BI__builtin_riscv_unzip_32: {
24104  switch (BuiltinID) {
24105  default: llvm_unreachable("unexpected builtin ID");
24106  // Zbb
24107  case RISCV::BI__builtin_riscv_orc_b_32:
24108  case RISCV::BI__builtin_riscv_orc_b_64:
24109  ID = Intrinsic::riscv_orc_b;
24110  break;
24111  case RISCV::BI__builtin_riscv_clz_32:
24112  case RISCV::BI__builtin_riscv_clz_64: {
24113  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
24114  Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
24115  if (Result->getType() != ResultType)
24116  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
24117  "cast");
24118  return Result;
24119  }
24120  case RISCV::BI__builtin_riscv_ctz_32:
24121  case RISCV::BI__builtin_riscv_ctz_64: {
24122  Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
24123  Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
24124  if (Result->getType() != ResultType)
24125  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
24126  "cast");
24127  return Result;
24128  }
24129 
24130  // Zbc
24131  case RISCV::BI__builtin_riscv_clmul_32:
24132  case RISCV::BI__builtin_riscv_clmul_64:
24133  ID = Intrinsic::riscv_clmul;
24134  break;
24135  case RISCV::BI__builtin_riscv_clmulh_32:
24136  case RISCV::BI__builtin_riscv_clmulh_64:
24137  ID = Intrinsic::riscv_clmulh;
24138  break;
24139  case RISCV::BI__builtin_riscv_clmulr_32:
24140  case RISCV::BI__builtin_riscv_clmulr_64:
24141  ID = Intrinsic::riscv_clmulr;
24142  break;
24143 
24144  // Zbkx
24145  case RISCV::BI__builtin_riscv_xperm8_32:
24146  case RISCV::BI__builtin_riscv_xperm8_64:
24147  ID = Intrinsic::riscv_xperm8;
24148  break;
24149  case RISCV::BI__builtin_riscv_xperm4_32:
24150  case RISCV::BI__builtin_riscv_xperm4_64:
24151  ID = Intrinsic::riscv_xperm4;
24152  break;
24153 
24154  // Zbkb
24155  case RISCV::BI__builtin_riscv_brev8_32:
24156  case RISCV::BI__builtin_riscv_brev8_64:
24157  ID = Intrinsic::riscv_brev8;
24158  break;
24159  case RISCV::BI__builtin_riscv_zip_32:
24160  ID = Intrinsic::riscv_zip;
24161  break;
24162  case RISCV::BI__builtin_riscv_unzip_32:
24163  ID = Intrinsic::riscv_unzip;
24164  break;
24165  }
24166 
24167  IntrinsicTypes = {ResultType};
24168  break;
24169  }
24170 
24171  // Zk builtins
24172 
24173  // Zknh
24174  case RISCV::BI__builtin_riscv_sha256sig0:
24175  ID = Intrinsic::riscv_sha256sig0;
24176  break;
24177  case RISCV::BI__builtin_riscv_sha256sig1:
24178  ID = Intrinsic::riscv_sha256sig1;
24179  break;
24180  case RISCV::BI__builtin_riscv_sha256sum0:
24181  ID = Intrinsic::riscv_sha256sum0;
24182  break;
24183  case RISCV::BI__builtin_riscv_sha256sum1:
24184  ID = Intrinsic::riscv_sha256sum1;
24185  break;
24186 
24187  // Zksed
24188  case RISCV::BI__builtin_riscv_sm4ks:
24189  ID = Intrinsic::riscv_sm4ks;
24190  break;
24191  case RISCV::BI__builtin_riscv_sm4ed:
24192  ID = Intrinsic::riscv_sm4ed;
24193  break;
24194 
24195  // Zksh
24196  case RISCV::BI__builtin_riscv_sm3p0:
24197  ID = Intrinsic::riscv_sm3p0;
24198  break;
24199  case RISCV::BI__builtin_riscv_sm3p1:
24200  ID = Intrinsic::riscv_sm3p1;
24201  break;
24202 
24203  // Zihintntl
24204  case RISCV::BI__builtin_riscv_ntl_load: {
24205  llvm::Type *ResTy = ConvertType(E->getType());
24206  unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
24207  if (Ops.size() == 2)
24208  DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
24209 
24210  llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
24211  getLLVMContext(),
24212  llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
24213  llvm::MDNode *NontemporalNode = llvm::MDNode::get(
24214  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
24215 
24216  int Width;
24217  if(ResTy->isScalableTy()) {
24218  const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
24219  llvm::Type *ScalarTy = ResTy->getScalarType();
24220  Width = ScalarTy->getPrimitiveSizeInBits() *
24221  SVTy->getElementCount().getKnownMinValue();
24222  } else
24223  Width = ResTy->getPrimitiveSizeInBits();
24224  LoadInst *Load = Builder.CreateLoad(
24225  Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
24226 
24227  Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
24228  Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
24229  RISCVDomainNode);
24230 
24231  return Load;
24232  }
24233  case RISCV::BI__builtin_riscv_ntl_store: {
24234  unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
24235  if (Ops.size() == 3)
24236  DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
24237 
24238  llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
24239  getLLVMContext(),
24240  llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
24241  llvm::MDNode *NontemporalNode = llvm::MDNode::get(
24242  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
24243 
24244  StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
24245  Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
24246  Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
24247  RISCVDomainNode);
24248 
24249  return Store;
24250  }
24251 
24252  // Vector builtins are handled from here.
24253 #include "clang/Basic/riscv_vector_builtin_cg.inc"
24254  // SiFive Vector builtins are handled from here.
24255 #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
24256  }
24257 
24258  assert(ID != Intrinsic::not_intrinsic);
24259 
24260  llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
24261  return Builder.CreateCall(F, Ops, "");
24262 }
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3278
int Depth
Definition: ASTDiff.cpp:190
DynTypedNode Node
StringRef P
static char ID
Definition: Arena.cpp:183
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
llvm::APSInt APSInt
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8491
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1347
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6538
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1776
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2213
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, unsigned FPAccuracyIntrinsicID=Intrinsic::not_intrinsic)
Definition: CGBuiltin.cpp:557
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6407
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:753
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:879
#define X86_CPU_SUBTYPE(ENUM, STR)
#define LD_CASES_AS_SCOPES(ORDER)
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1930
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:887
#define ST_VOLATILE_CASES(ADDR_SPACE)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:725
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7553
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7565
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:391
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7535
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9386
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9375
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8580
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2548
static bool hasFuncNameRequestedFPAccuracy(StringRef Name, const LangOptions &LangOpts)
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:247
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1484
#define MMA_INTR(geom_op_type, layout)
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:604
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1408
static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name)
Definition: CGBuiltin.cpp:522
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6534
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7566
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1545
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2173
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:480
#define MUTATE_LDBL(func)
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9312
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:455
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:938
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2670
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:172
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2576
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:847
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9308
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7567
@ UnsignedAlts
Definition: CGBuiltin.cpp:6501
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6506
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6510
@ Use64BitVectors
Definition: CGBuiltin.cpp:6503
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6498
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6508
@ InventFloatType
Definition: CGBuiltin.cpp:6500
@ AddRetType
Definition: CGBuiltin.cpp:6493
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6495
@ VectorizeRetType
Definition: CGBuiltin.cpp:6497
@ VectorRet
Definition: CGBuiltin.cpp:6507
@ Add1ArgType
Definition: CGBuiltin.cpp:6494
@ Use128BitVectors
Definition: CGBuiltin.cpp:6504
constexpr unsigned SVEBitsPerBlock
Definition: CGBuiltin.cpp:9823
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: CGBuiltin.cpp:9825
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7377
#define X86_VENDOR(ENUM, STRING)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:812
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:444
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:624
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6531
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:264
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8410
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static Function * getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0, unsigned FPIntrinsicID, unsigned IntrinsicID, bool HasAccuracyRequirement)
Definition: CGBuiltin.cpp:514
#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD)
#define MMA_SATF_VARIANTS(geom, type)
static CallInst * CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name, llvm::Function *FPBuiltinF, ArrayRef< Value * > Args, unsigned ID)
Definition: CGBuiltin.cpp:491
static Function * emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E, unsigned FPAccuracyIntrinsicID, unsigned IntrinsicID, llvm::Value *Src0, StringRef &Name)
Definition: CGBuiltin.cpp:529
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9349
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:236
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, unsigned FPAccuracyIntrinsicID=Intrinsic::not_intrinsic)
Definition: CGBuiltin.cpp:581
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:183
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
#define LD_VOLATILE_CASES(ADDR_SPACE)
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8512
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:695
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6543
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6296
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:744
#define ST_CASES_AS_SCOPES(ORDER)
#define CUSTOM_BUILTIN_MAPPING(x, s)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2406
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:833
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9401
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2460
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7570
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2599
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
#define INTRINSIC_WITH_CC(NAME)
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:255
MSVCIntrin
Definition: CGBuiltin.cpp:1587
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:643
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:349
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:73
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2179
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8483
@ VolatileRead
Definition: CGBuiltin.cpp:8485
@ NormalRead
Definition: CGBuiltin.cpp:8484
@ Write
Definition: CGBuiltin.cpp:8486
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:194
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:672
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2448
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:683
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:707
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9319
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:212
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:309
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7562
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:797
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1357
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6862
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:662
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
#define MMA_LDST(n, geom_op_type)
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9539
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9413
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2681
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2414
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:782
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6366
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:468
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1630
#define X86_CPU_TYPE(ENUM, STR)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1393
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9341
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount)
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7628
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7564
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7137
static Address EmitPointerWithAlignment(const Expr *E, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo, KnownNonNull_t IsKnownNonNull, CodeGenFunction &CGF)
Definition: CGExpr.cpp:1250
unsigned Offset
Definition: Format.cpp:2974
const CFGBlock * Block
Definition: HTMLLogger.cpp:153
const Environment & Env
Definition: HTMLLogger.cpp:148
unsigned Iter
Definition: HTMLLogger.cpp:154
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:143
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
const char * Data
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
SourceLocation End
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ long long abs(long long __n)
__DEVICE__ double powi(double __a, int __b)
__DEVICE__ int min(int __a, int __b)
__DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b)
__DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b)
__DEVICE__ double exp10(double __a)
__DEVICE__ int max(int __a, int __b)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:423
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1118
IdentifierTable & Idents
Definition: ASTContext.h:644
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:646
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2611
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2334
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1091
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2236
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2239
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
CharUnits getSize() const
getSize - Get the record size in characters.
Definition: RecordLayout.h:193
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3318
QualType getElementType() const
Definition: Type.h:3330
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:149
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2872
bool hasStoredFPFeatures() const
Definition: Expr.h:3034
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Expr.cpp:1693
FPOptionsOverride getFPFeatures() const
Definition: Expr.h:3154
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:3050
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:3063
arg_range arguments()
Definition: Expr.h:3111
Expr * getCallee()
Definition: Expr.h:3022
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition: Expr.h:3042
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1645
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:111
static Address invalid()
Definition: Address.h:153
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:220
CharUnits getAlignment() const
Definition: Address.h:166
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:184
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:241
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:176
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:234
An aggregate value slot.
Definition: CGValue.h:512
Address getAddress() const
Definition: CGValue.h:652
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:869
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:886
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:397
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:345
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
All available information about a concrete callee.
Definition: CGCall.h:62
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:128
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual llvm::Value * getPipeElemAlign(const Expr *PipeArg)
virtual llvm::Value * getPipeElemSize(const Expr *PipeArg)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:257
void add(RValue rvalue, QualType type)
Definition: CGCall.h:281
Class to manage the BuiltinID for the current builtin expression during processing in EmitBuiltinExpr...
RAII object to set/unset CodeGenFunction::IsSanitizerScope.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition: CGBuiltin.cpp:9832
llvm::Value * FormSVEBuiltinResult(llvm::Value *Call)
FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider vector.
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask >> Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
Definition: CGExpr.cpp:3515
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertType(QualType T)
BuiltinCheckKind
Specifies which type of sanitizer check to apply when handling a particular builtin.
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: CGBuiltin.cpp:9978
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
Emits a call or invoke instruction to the given runtime function.
Definition: CGCall.cpp:4943
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition: CGBuiltin.cpp:9694
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: CGBuiltin.cpp:9921
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
Definition: CGBuiltin.cpp:871
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
Definition: CGExpr.cpp:3380
llvm::LLVMContext & getLLVMContext()
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
Definition: CGBuiltin.cpp:2318
RValue EmitIntelSYCLAllocaBuiltin(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
Definition: CGBuiltin.cpp:2218
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: CGBuiltin.cpp:9867
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: CGBuiltin.cpp:7589
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition: CGBuiltin.cpp:9710
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:7690
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
Definition: CGBuiltin.cpp:2153
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type,...
Definition: CGCall.cpp:5099
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
Definition: CGExpr.cpp:3300
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
Definition: CGBuiltin.cpp:2583
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitIntelFPGARegBuiltin(const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:8621
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition: CGBuiltin.cpp:9782
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9427
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: CGBuiltin.cpp:6461
llvm::CallBase * addControlledConvergenceToken(llvm::CallBase *Input)
Definition: CGBuiltin.cpp:1292
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
Definition: CGExpr.cpp:2009
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
const TargetInfo & getTarget() const
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: CGBuiltin.cpp:6454
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1964
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: CGBuiltin.cpp:9682
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
Definition: CGBuiltin.cpp:8386
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9528
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Definition: CGExpr.cpp:3880
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
Definition: CGBuiltin.cpp:2694
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
llvm::CallInst * MaybeEmitFPBuiltinofFD(llvm::FunctionType *IRFuncTy, const SmallVectorImpl< llvm::Value * > &IRArgs, llvm::Value *FnPtr, StringRef Name, unsigned FDBuiltinID)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
RValue EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:1381
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
Definition: CGExpr.cpp:3823
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
Definition: CGExpr.cpp:2023
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitIntelFPGAMemBuiltin(const CallExpr *E)
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition: CGBuiltin.cpp:9817
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
static Destroyer destroyARCStrongPrecise
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
Definition: CGBuiltin.cpp:6352
const FieldDecl * FindFlexibleArrayMemberField(ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset)
Definition: CGBuiltin.cpp:902
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition: CGBuiltin.cpp:9747
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
Given a number of pointers, inform the optimizer that they're being intrinsically used up until this ...
Definition: CGObjC.cpp:2123
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: CGBuiltin.cpp:6433
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
const LangOptions & getLangOpts() const
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
const llvm::DataLayout & getDataLayout() const
void getFPAccuracyFuncAttributes(StringRef Name, llvm::AttributeList &AttrList, llvm::Metadata *&MDs, unsigned ID, const llvm::Type *FuncType)
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:98
llvm::Module & getModule() const
llvm::LLVMContext & getLLVMContext()
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
DiagnosticsEngine & getDiags() const
ASTContext & getContext() const
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
const CodeGenOptions & getCodeGenOpts() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1636
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:351
LValue - This represents an lvalue references.
Definition: CGValue.h:181
Address getAddress(CodeGenFunction &CGF) const
Definition: CGValue.h:370
void setNontemporal(bool Value)
Definition: CGValue.h:322
Information used when generating a structured loop.
Definition: CGLoopInfo.h:168
const LoopInfo * getParent() const
Returns the first outer loop containing this loop if any, nullptr otherwise.
Definition: CGLoopInfo.h:196
llvm::BasicBlock * getHeader() const
Get the header block of this loop.
Definition: CGLoopInfo.h:179
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:41
static RValue getIgnored()
Definition: CGValue.h:92
static RValue get(llvm::Value *V)
Definition: CGValue.h:97
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:70
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:124
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:107
An abstract representation of an aligned address.
Definition: Address.h:41
llvm::Value * getPointer() const
Definition: Address.h:65
static RawAddress invalid()
Definition: Address.h:60
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:355
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:159
Complex values, per C99 6.2.5p11.
Definition: Type.h:2886
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:3967
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:1946
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1260
static DeclRefExpr * Create(const ASTContext &Context, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, ValueDecl *D, bool RefersToEnclosingVariableOrCapture, SourceLocation NameLoc, QualType T, ExprValueKind VK, NamedDecl *FoundD=nullptr, const TemplateArgumentListInfo *TemplateArgs=nullptr, NonOdrUseReason NOUR=NOUR_None)
Definition: Expr.cpp:488
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:601
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:227
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:413
bool hasAttr() const
Definition: DeclBase.h:585
T * getAttr() const
Definition: DeclBase.h:581
DeclContext * getDeclContext()
Definition: DeclBase.h:456
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:193
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1553
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3116
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3111
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3107
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:825
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3608
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3091
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3975
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:949
Represents a member of a struct/union/class.
Definition: Decl.h:3059
Represents a function declaration or definition.
Definition: Decl.h:1972
const TemplateArgumentList * getTemplateSpecializationArgs() const
Retrieve the template arguments used to produce this function template specialization from the primar...
Definition: Decl.cpp:4182
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2708
Represents a prototype with parameter type info, e.g.
Definition: Type.h:4456
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5384
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:475
std::string FPAccuracyVal
Definition: LangOptions.h:599
FPAccuracyFuncMapTy FPAccuracyFuncMap
Definition: LangOptions.h:601
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:292
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
A C++ nested-name-specifier augmented with source location information.
PipeType - OpenCL20.
Definition: Type.h:7008
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2939
QualType getPointeeType() const
Definition: Type.h:2949
A (possibly-)qualified type.
Definition: Type.h:738
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:7243
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2836
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:7285
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:7199
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2832
The collection of all-type qualifiers we support.
Definition: Type.h:148
Represents a struct/union/class.
Definition: Decl.h:4170
field_range fields() const
Definition: Decl.h:4376
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
static SYCLUniqueStableIdExpr * Create(const ASTContext &Ctx, SourceLocation OpLoc, SourceLocation LParen, SourceLocation RParen, Expr *E)
Definition: Expr.cpp:631
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
StringLiteral - This represents a string literal expression, e.g.
Definition: Expr.h:1773
StringRef getString() const
Definition: Expr.h:1850
bool isUnion() const
Definition: Decl.h:3792
Exposes information about the current target.
Definition: TargetInfo.h:213
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:830
bool isBigEndian() const
Definition: TargetInfo.h:1629
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1235
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:702
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:307
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:85
A template argument list.
Definition: DeclTemplate.h:244
const TemplateArgument & get(unsigned Idx) const
Retrieve the template argument at a given index.
Definition: DeclTemplate.h:265
QualType getAsType() const
Retrieve the type for a type template argument.
Definition: TemplateBase.h:319
llvm::APSInt getAsIntegral() const
Retrieve the template argument as an integral value.
Definition: TemplateBase.h:363
ValueDecl * getAsDecl() const
Retrieve the declaration for a declaration non-type template argument.
Definition: TemplateBase.h:326
The base class of the type hierarchy.
Definition: Type.h:1607
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1870
bool isBlockPointerType() const
Definition: Type.h:7420
bool isVoidType() const
Definition: Type.h:7723
bool isBooleanType() const
Definition: Type.h:7851
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2134
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:666
bool isArrayType() const
Definition: Type.h:7478
bool isCountAttributedType() const
Definition: Type.cpp:683
bool isPointerType() const
Definition: Type.h:7412
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7763
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8008
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:694
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:7838
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2224
bool isBitIntType() const
Definition: Type.h:7658
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2174
bool isObjCObjectPointerType() const
Definition: Type.h:7544
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2246
bool isFloatingType() const
Definition: Type.cpp:2237
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2184
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:7941
bool isRecordType() const
Definition: Type.h:7506
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1874
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:707
QualType getType() const
Definition: Decl.h:718
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:3769
unsigned getNumElements() const
Definition: Type.h:3784
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
#define INT_MIN
Definition: limits.h:51
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask All
Definition: XRayInstr.h:43
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:181
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
@ System
Like Angled, but marks system directories.
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: Integral.h:29
bool Sub(InterpState &S, CodePtr OpPC)
Definition: Interp.h:330
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:923
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:217
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:348
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:489
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1385
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1707
bool Add(InterpState &S, CodePtr OpPC)
Definition: Interp.h:312
std::string toString(const til::SExpr *E)
AccessKind
This enum distinguishes between different ways to access (read or write) a variable.
Definition: ThreadSafety.h:75
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:151
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:136
const FunctionProtoType * T
CallingConv
CallingConv - Specifies the calling convention that a function uses.
Definition: Specifiers.h:275
@ Other
Other implicit parameter.
@ HiddenVisibility
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:37
unsigned long uint64_t
long int64_t
half clamp(half, half, half)
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
char __ovld __cnfn rotate(char, char)
For each element in v, the bits are shifted left by the number of bits given by the corresponding ele...
void __ovld prefetch(const __global char *, size_t)
Prefetch num_elements * sizeof(gentype) bytes into the global cache.
#define ptrauth_strip(__value, __key)
Definition: ptrauth.h:139
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
uint64_t Width
Definition: ASTContext.h:153
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742